We collected nearshore samples from southern Southeast Alaska and amplified the samples using MiFish primer (12S) and quantified using MiSeq Illumina
Goals for this script is to explore the read data - maybe represent it as species proportions by site (although we know that this raw data is not correct in terms of proportions so maybe we skip that).
After exploring species read counds by sites, maybe look at community composition (using presence and absence)
Finally lets compare the species occurences between seine data and eDNA by site
libraries
library(plyr)
library(dplyr)
library(tidyverse)
library(vegan)
library(glue)
library(ggrepel)
edna <- read.csv("/Users/liadomke/Desktop/Grad School/UAF/Data/Chp3_SeascapeDynamics/Data/eDNA_tax_filtered_3-24-22.csv")
loc <- read.csv("/Users/liadomke/Desktop/Grad School/UAF/Data/Chp3_SeascapeDynamics/Data/chp3_sites.csv")
seine <- read.csv("/Users/liadomke/Desktop/Grad School/UAF/Data/Chp3_SeascapeDynamics/Data/fish_beach_seine_2021_1-5-21.csv")
edna.minus.control <- read.csv("/Users/liadomke/Desktop/Grad School/UAF/Data/Chp3_SeascapeDynamics/Data/eDNA_tax_filtered_minus_controls_4-15-22.csv", stringsAsFactors = FALSE, header = TRUE)
visualize eDNA reads
glimpse(edna)
## Rows: 2,021
## Columns: 12
## $ X <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,…
## $ sample.PCR <chr> "e00005-A", "e00005-A", "e00005-A", "e00005-A", "e0000…
## $ sample <chr> "e00005", "e00005", "e00005", "e00005", "e00005", "e00…
## $ rep <chr> "A", "A", "A", "A", "A", "A", "A", "A", "A", "B", "B",…
## $ count <int> 39, 1678, 1542, 435, 451, 33, 6, 20, 2, 111, 3205, 307…
## $ taxon <chr> "Clupeidae", "Anoplopoma_fimbria", "Sebastidae", "Gadi…
## $ taxonomic_level <chr> "family", "species", "family", "family", "species", "g…
## $ SampleCode <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ Sample_label <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ Sample_rep <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ Sample_date <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ Extraction_date <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
unique(edna$taxon)
## [1] "Clupeidae" "Anoplopoma_fimbria"
## [3] "Sebastidae" "Gadidae"
## [5] "Anarrhichthys_ocellatus" "Hexagrammos"
## [7] "Oncorhynchus" "Hexagrammos_decagrammus"
## [9] "Bathymaster" "Stichaeidae"
## [11] "Enophrys_diceraus" "Homo_sapiens"
## [13] "Liparis" "Salmonidae"
## [15] "Cottidae" "Pleuronectidae"
## [17] "Pholis" "Icelinus_borealis"
## [19] "Cymatogaster_aggregata" "Gasterosteus"
## [21] "Clinocottus_acuticeps" "Actinopteri"
## [23] "Pleuronichthys" "Polypera_greeni"
## [25] "Citharichthys_stigmaeus" "Ammodytidae"
## [27] "Oligocottus" "Syngnathus_leptorhynchus"
## [29] "Anoplarchus" "Aulorhynchus_flavidus"
## [31] "Bos" "Apodichthys_flavidus"
## [33] "Enophrys_bison" "Hemilepidotus"
## [35] "Chitonotus_pugetensis" "Gobiesox_maeandricus"
## [37] "Artedius_lateralis" "Canis_lupus"
## [39] "Artedius_fenestralis" "Artedius_harringtoni"
## [41] "Sus" "Oxylebius_pictus"
## [43] "Rhamphocottus_richardsonii" "Oncorhynchus_tshawytscha"
## [45] "Nautichthys_oculofasciatus" "Gasterosteus_aculeatus"
## [47] "Lepidogobius_lepidus" "Liparis_fucensis"
## [49] "Brachyistius_frenatus" "Megaptera_novaeangliae"
## [51] "Embiotocidae" "Ovis"
## [53] "Synchirus_gilli" "Ophiodon_elongatus"
## [55] "Enhydra_lutris" "Oncorhynchus_kisutch"
## [57] "Citharichthys_sordidus" "Brosmophycis_marginata"
## [59] "Leptocottus_armatus" "Agonidae"
## [61] "Cypriniformes"
remove any sample ID that are not part of my sampling in SESO
edna.red <- edna %>%
filter(sample != "e00005" & sample != "e00007" & sample != "e00010" & sample != "e00011" &
sample != "e00012" & sample != "e00013" & sample != "e00014" & sample != "e00015" &
sample != "e00016" & sample != "e00017") %>%
select(-X)
edna.red$binary <- ifelse(edna.red$count > 0, 1, 0)
add in habitat information
Divide by habitat type
sites <- c("Sylburn harbor eel", "Sylburn harbor kelp", "Reef harbor eel", "Reef harbor kelp", "Kah shakes eel", "Kah shakes kelp", "Dall bay eel", "Dall bay kelp",
"Moira sound eel", "Moira sound kelp", "Nichols bay eel", "Nichols bay kelp", "Tah bay eel", "Hunter bay eel", "Klawock airport eel", "Big Tree bay eel", "Clam island mixed", "Cole island mixed", "Bostwick inlet eel", "Bostwick inlet kelp")
habitat <- c("eelgrass", "kelp", "eelgrass", "kelp",
"eelgrass", "kelp", "eelgrass", "kelp",
"eelgrass", "kelp", "eelgrass", "kelp",
"mixed eelgrass", "eelgrass", "eelgrass",
"eelgrass", "mixed eelgrass",
"mixed eelgrass", "eelgrass", "kelp")
bay_id <- c("SYLB_A", "SYLB_B", "REEF_A", "REEF_B", "KAHS_A", "KAHS_B", "DALL_A", "DALL_B",
"MOIR_A", "MOIR_B", "NICH_A", "NICH_B", "TAHB_A", "HUNT_A", "KLWA_A", "BTRB_A",
"CLAM_A", "NFEI_B", "BOST_A", "BOST_B")
chp3 <- data.frame(sites, habitat)
edna.red.hab <- left_join(chp3, edna.red, by = c("sites" = "Sample_label"))
# change to P/A
# sppAbun[sppAbun > 0] <- 1 #converts from abundance to P/A
# sppAbun
If we’re interested in accounting for the field controls - subtract the controls by site, from the ASVs collected, this was done in the script “taxonomic_filtering_eDNA2021.Rmd”
glimpse(edna.minus.control)
## Rows: 1,555
## Columns: 11
## $ X <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 1…
## $ sample.PCR <chr> "e00289-A", "e00289-A", "e00289-B", "e00289-B", …
## $ sample <chr> "e00289", "e00289", "e00289", "e00289", "e00289"…
## $ taxon <chr> "Pholis", "Oncorhynchus", "Salmonidae", "Cymatog…
## $ taxonomic_level <chr> "genus", "genus", "family", "species", "genus", …
## $ SampleCode <chr> "2021_SYLB_A_1", "2021_SYLB_A_1", "2021_SYLB_A_1…
## $ Sample_label <chr> "Sylburn harbor eel", "Sylburn harbor eel", "Syl…
## $ Sample_rep <int> 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, 3, 3, 3, 3, 3, 3, …
## $ Sample_date <chr> "6/8/21", "6/8/21", "6/8/21", "6/8/21", "6/8/21"…
## $ Extraction_date <chr> "11/18/21", "11/18/21", "11/18/21", "11/18/21", …
## $ counts_minus_controls <int> 0, 1110, 0, 2326, 870, 1, 2937, 752, 364, 1, 228…
# add in habitat data, like above
edna.nocontrol.hab <- left_join(chp3, edna.minus.control, by = c("sites" = "Sample_label")) %>%
dplyr::select(-X)
# rotate the dataframe to wide (like below) and create a binary column
edna.nocontrol.hab$binary <- ifelse(edna.nocontrol.hab$counts_minus_controls > 0, 1, 0)
# binary wide
minuscontrol.wide.bin <- edna.nocontrol.hab %>%
pivot_wider(id_cols = -c(taxonomic_level, counts_minus_controls), names_from = "taxon", values_from = "binary", values_fill = 0) %>%
distinct()
# make the binary long again better for plotting
minuscontrol.long.bin <- minuscontrol.wide.bin %>%
pivot_longer(cols = Pholis:Agonidae, names_to = "taxon", values_to = "binary")
# count-based wide then long
minuscontrol.wide.counts <- edna.nocontrol.hab %>%
pivot_wider(id_cols = -c(taxonomic_level, binary), names_from = "taxon", values_from = "counts_minus_controls", values_fill = 0) %>%
distinct()
minuscontrol.long.counts <- minuscontrol.wide.counts %>%
pivot_longer(cols = Pholis:Agonidae, names_to = "taxon", values_to = "counts_minus_controls")
rotate data frame to wide for dataframe that still includes the control information
edna.wide <- edna.red.hab %>%
pivot_wider(id_cols = -c(taxonomic_level, binary), names_from = "taxon", values_from = "count", values_fill = 0) %>%
distinct()
edna.bin.wide <- edna.red.hab %>%
pivot_wider(id_cols = -c(taxonomic_level, count), names_from = "taxon", values_from = "binary",
values_fill = 0) %>%
distinct()
# take the binary wide dataframe and make it long again for plotting
edna.hab.long <- pivot_longer(edna.bin.wide, cols = Pholis:Agonidae, names_to = "taxon", values_to = "binary")
Figure 1: Detection and count of taxon across the eelgrass sites where we have eDNA data and includes controls
The graphs above indicate relative consistency species occurrence (presence) across the technical replicates and even the field replicates. However, there are a few confusing exceptions. In particular Sylburn harbor eelgrass has extremely low species occurence. Samples 1 - A is composed of Oncorhynchus and Pholis, sample 1- B is Cymatogaster_aggregata, Oncorhynchus, 1-C has oncorhnynchus, and Clupeidae. Sample 2 ABC has nearly nothing with A only having Citharichthys_stigmaeus, Ammodytidae, Oligocottus, and Oncorhynchus Sample 3 ABC added hexagrammous to the mix of species present (with others)
Besides Sylburn harbour eel, the rest of the sites look decide. Some variability in what is present in the controls for each site. Most have very few counts or none in the field control and technical triplicates. However this is not the case for Kah shakes and Reef harbor eel.
Questions
How should we account for the control? Do we subtract the control species from the the presence/absence in the field replicates? Do we substract the ASV read counts of the control from each field sample? <- doing this may be a bad idea because if there was little genetic material in the control and lots of PCR material (nucleotides etc) then it would artificially make the ASV counts of the genetic material there greater than if it was present in a sample with lots of genetic material.
How do we deal with the technical triplicates? Previously (w/ Ole) we talked about using them to express the stochasity of the data. How do we express a confidence interval w/ presence/absence data?
Figure 2: Detection and count of taxon across the understory kelp sites where we have eDNA data and includes controls
These graphs are similar to the eelgrass sites. In this case the sites that have questionable results are: moira sound kelp - with very few ASV counts across the replicates, Nichols bay kelp - almost no species present in the 3rd field replicate, Sylburn harbor klep - with not very consistent results across the field replicates and various species present in the control samples.
Lets look at what these sites look like when graphed with the ASV summed across the technical replicates
Figure 3: Detection and count of taxon across the eelgrass sites where we have eDNA data and includes controls. This sums across technical replicates
Figure 4: Heat map of the presence/absence of taxon present in eelgrass meadows.
This isn’t that helpful especially since the taxon are hard to read, we need to do a couple things : - combine species that are in the same group (so Oncorhynchus should be combined with species specific instances of Oncorhynchus)
unique(edna.hab.long$taxon)
## [1] "Pholis" "Oncorhynchus"
## [3] "Salmonidae" "Cymatogaster_aggregata"
## [5] "Clupeidae" "Citharichthys_stigmaeus"
## [7] "Ammodytidae" "Oligocottus"
## [9] "Hexagrammos" "Syngnathus_leptorhynchus"
## [11] "Cottidae" "Anoplarchus"
## [13] "Stichaeidae" "Homo_sapiens"
## [15] "Aulorhynchus_flavidus" "Gadidae"
## [17] "Bos" "Sebastidae"
## [19] "Clinocottus_acuticeps" "Apodichthys_flavidus"
## [21] "Enophrys_bison" "Actinopteri"
## [23] "Polypera_greeni" "Hemilepidotus"
## [25] "Chitonotus_pugetensis" "Pleuronectidae"
## [27] "Hexagrammos_decagrammus" "Gasterosteus"
## [29] "Gobiesox_maeandricus" "Artedius_lateralis"
## [31] "Canis_lupus" "Artedius_fenestralis"
## [33] "Artedius_harringtoni" "Sus"
## [35] "Anoplopoma_fimbria" "Oxylebius_pictus"
## [37] "Rhamphocottus_richardsonii" "Oncorhynchus_tshawytscha"
## [39] "Nautichthys_oculofasciatus" "Gasterosteus_aculeatus"
## [41] "Lepidogobius_lepidus" "Liparis_fucensis"
## [43] "Brachyistius_frenatus" "Megaptera_novaeangliae"
## [45] "Embiotocidae" "Ovis"
## [47] "Synchirus_gilli" "Ophiodon_elongatus"
## [49] "Pleuronichthys" "Enhydra_lutris"
## [51] "Oncorhynchus_kisutch" "Citharichthys_sordidus"
## [53] "Brosmophycis_marginata" "Leptocottus_armatus"
## [55] "Agonidae"
edna.hab.long$taxon <- as.factor(edna.hab.long$taxon)
levels(edna.hab.long$taxon)[levels(edna.hab.long$taxon)=="Oncorhynchus_kisutch"] <- "Oncorhynchus"
levels(edna.hab.long$taxon)[levels(edna.hab.long$taxon)=="Hexagrammos_decagrammus"] <- "Hexagrammos"
levels(edna.hab.long$taxon)[levels(edna.hab.long$taxon)=="Gasterosteus_aculeatus"] <- "Gasterosteus"
levels(edna.hab.long$taxon)[levels(edna.hab.long$taxon)=="Oncorhynchus_tshawytscha"] <- "Oncorhynchus"
# Oligocottus genus of sculpins
# Cottidae family of sculpins
# Anoplarchus genus of cockscombs
# Stichaeidae family of pricklebacks
# Apodichthys_flavidus penpoint gunnel
# Actinopteri - class of ray finned fishes general
# Polypera_greeni Lopefine snailfish
# Hemilepidotus - genus of sclupins (irish lords)
# Pleuronectidae family of right eye flounders
# Pleuronectidae is a genus within this family
# Gobiesox_maeandricus - northern clingfish
# Oxylebius_pictus painted greenling
# Nautichthys_oculofasciatus - sailfin sculpin
# Ophiodon_elongatus lingcod
# filter out canis_lupus, homo_sapiens, megapstera_novaeangliae, enhydra_lutris, Genus Sus (domestic pig), Genus Bos (beef),
# Genus Ovis (lamb)
edna.fish.long <- edna.hab.long %>%
filter(taxon != "Canis_lupus" & taxon != "Homo_sapiens" & taxon != "Megaptera_novaeangliae" &
taxon != "Enhydra_lutris" & taxon != "Sus" & taxon != "Bos" & taxon != "Ovis")
unique(edna.fish.long$taxon) # now 51 unique occurrences
## [1] Pholis Oncorhynchus
## [3] Salmonidae Cymatogaster_aggregata
## [5] Clupeidae Citharichthys_stigmaeus
## [7] Ammodytidae Oligocottus
## [9] Hexagrammos Syngnathus_leptorhynchus
## [11] Cottidae Anoplarchus
## [13] Stichaeidae Aulorhynchus_flavidus
## [15] Gadidae Sebastidae
## [17] Clinocottus_acuticeps Apodichthys_flavidus
## [19] Enophrys_bison Actinopteri
## [21] Polypera_greeni Hemilepidotus
## [23] Chitonotus_pugetensis Pleuronectidae
## [25] Gasterosteus Gobiesox_maeandricus
## [27] Artedius_lateralis Artedius_fenestralis
## [29] Artedius_harringtoni Anoplopoma_fimbria
## [31] Oxylebius_pictus Rhamphocottus_richardsonii
## [33] Nautichthys_oculofasciatus Lepidogobius_lepidus
## [35] Liparis_fucensis Brachyistius_frenatus
## [37] Embiotocidae Synchirus_gilli
## [39] Ophiodon_elongatus Pleuronichthys
## [41] Citharichthys_sordidus Brosmophycis_marginata
## [43] Leptocottus_armatus Agonidae
## 51 Levels: Actinopteri Agonidae Ammodytidae Anoplarchus ... Syngnathus_leptorhynchus
edna.fish.long$taxon <- as.character(edna.fish.long$taxon)
# need to combine the presence/absence with the renaming of the levels of taxon
edna.fish.long <- distinct(edna.fish.long)
# this includes the control in the counts so its not right, need to remove the control, then summarize, add control back in, and graph
# remove control binary and taxon at sites
control.eel <- edna.fish.long %>%
filter(habitat == "eelgrass" & Sample_rep == "control") %>%
distinct()
# creates list of fish species without any occurrences
no.occ.eel <- edna.fish.long %>%
filter(habitat == "eelgrass" & Sample_rep != "control") %>%
group_by(taxon) %>%
dplyr::summarise(occurrence = sum(binary)) %>%
subset(occurrence == "0") %>%
dplyr::select(-occurrence)
#no.occ.eel <- as.vector(no.occ$taxon)
## `summarise()` has grouped output by 'sites'. You can override using the
## `.groups` argument.
Figure 5: Heat map of the presence/absence of taxon present in eelgrass meadows with field and technical replicates together and excluding controls
Looking at this graph, what is the most useful about it? This doesn’t subtract out the field controls. Based on some conversations with Pat Berry we can try and subtract the ASVs counts in the control from (each? I think) each field sample. This should be done on the basis of the ASV - not the species identification.
This graph is a combination of the field and lab replicates. I think about the lab replicates as an expression of the stochasicity present in each field replicate – so in my ideal world I would express each field replicate individually plus or minus some certainty. However, since this data is binary this doesn’t make sense. Instead maybe re-graph each field replicate with occurrence (0-3) based on the lab replciates
# this includes the control in the counts so its not right, need to remove the control, then summarize, add control back in, and graph
# remove control binary and taxon at sites
control.kelp <- edna.fish.long %>%
filter(habitat == "kelp" & Sample_rep == "control") %>%
distinct()
# creates list of fish species without any occurrences
no.occ.kelp <- edna.fish.long %>%
filter(habitat == "kelp" & Sample_rep != "control") %>%
group_by(taxon) %>%
dplyr::summarise(occurrence = sum(binary)) %>%
subset(occurrence == "0") %>%
dplyr::select(-occurrence)
## `summarise()` has grouped output by 'sites'. You can override using the
## `.groups` argument.
Figure 6: Heat map of the presence/absence of taxon present in understory kelp with field and technical replciates together and excluding controls.
Oligocottus genus of sculpins
Cottidae family of sculpins
Anoplarchus genus of cockscombs
Stichaeidae family of pricklebacks
Apodichthys_flavidus penpoint gunnel
Actinopteri - class of ray finned fishes general
Polypera_greeni Lopefine snailfish
Hemilepidotus - genus of sclupins (irish lords)
Pleuronectidae family of right eye flounders
Pleuronectidae is a genus within this family
Gobiesox_maeandricus - northern clingfish
Oxylebius_pictus painted greenling
Nautichthys_oculofasciatus - sailfin sculpin
Ophiodon_elongatus lingcod
In the taxonomic filtering edna 2021 RMD script, I subtracted the counts in the controls (summed across lab replicates) from each field replicate
This does not include the negative and positive controls (or even the DNA extraction controls)
# data without the control
glimpse(minuscontrol.long.counts)
## Rows: 9,342
## Columns: 10
## $ sites <chr> "Sylburn harbor eel", "Sylburn harbor eel", "Syl…
## $ habitat <chr> "eelgrass", "eelgrass", "eelgrass", "eelgrass", …
## $ sample.PCR <chr> "e00289-A", "e00289-A", "e00289-A", "e00289-A", …
## $ sample <chr> "e00289", "e00289", "e00289", "e00289", "e00289"…
## $ SampleCode <chr> "2021_SYLB_A_1", "2021_SYLB_A_1", "2021_SYLB_A_1…
## $ Sample_rep <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ Sample_date <chr> "6/8/21", "6/8/21", "6/8/21", "6/8/21", "6/8/21"…
## $ Extraction_date <chr> "11/18/21", "11/18/21", "11/18/21", "11/18/21", …
## $ taxon <chr> "Pholis", "Oncorhynchus", "Salmonidae", "Cymatog…
## $ counts_minus_controls <int> 0, 1110, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
glimpse(minuscontrol.long.bin)
## Rows: 9,342
## Columns: 10
## $ sites <chr> "Sylburn harbor eel", "Sylburn harbor eel", "Sylburn h…
## $ habitat <chr> "eelgrass", "eelgrass", "eelgrass", "eelgrass", "eelgr…
## $ sample.PCR <chr> "e00289-A", "e00289-A", "e00289-A", "e00289-A", "e0028…
## $ sample <chr> "e00289", "e00289", "e00289", "e00289", "e00289", "e00…
## $ SampleCode <chr> "2021_SYLB_A_1", "2021_SYLB_A_1", "2021_SYLB_A_1", "20…
## $ Sample_rep <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ Sample_date <chr> "6/8/21", "6/8/21", "6/8/21", "6/8/21", "6/8/21", "6/8…
## $ Extraction_date <chr> "11/18/21", "11/18/21", "11/18/21", "11/18/21", "11/18…
## $ taxon <chr> "Pholis", "Oncorhynchus", "Salmonidae", "Cymatogaster_…
## $ binary <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
Lets graph by habitat types and summed ASV counts by species ## eelgrass by field_tech reps
minuscontrol.long.counts <- minuscontrol.long.counts %>%
separate(sample.PCR, into = c("sample.PCR", "rep"), sep = "-", remove = FALSE) %>%
unite(sample.PCR, sample.PCR:rep, sep = "-", remove = FALSE) # for wahtever reason you have to put it back together to keep column
# repeat with binary
minuscontrol.long.bin <- minuscontrol.long.bin %>%
separate(sample.PCR, into = c("sample.PCR", "rep"), sep = "-", remove = FALSE) %>%
unite(sample.PCR, sample.PCR:rep, sep = "-", remove = FALSE) # for wahtever reason you have to put it back together to keep column
Figure 7: Heat map of the presence/absence of taxon present in eelgrass meadows after subtracting the ASV counts present in the controls
Figure 8: Heat map of the presence/absence of taxon present in understory kelp after subtracting the ASV counts present in the controls
Combine the salmon species because we know the primer doesn’t do a great job of seperating them This will be for the heat maps with p/a
unique(minuscontrol.long.bin$taxon)
## [1] "Pholis" "Oncorhynchus"
## [3] "Salmonidae" "Cymatogaster_aggregata"
## [5] "Clupeidae" "Citharichthys_stigmaeus"
## [7] "Ammodytidae" "Oligocottus"
## [9] "Hexagrammos" "Bos"
## [11] "Syngnathus_leptorhynchus" "Gadidae"
## [13] "Sebastidae" "Stichaeidae"
## [15] "Anoplarchus" "Cottidae"
## [17] "Clinocottus_acuticeps" "Apodichthys_flavidus"
## [19] "Enophrys_bison" "Actinopteri"
## [21] "Polypera_greeni" "Hemilepidotus"
## [23] "Chitonotus_pugetensis" "Pleuronectidae"
## [25] "Hexagrammos_decagrammus" "Gobiesox_maeandricus"
## [27] "Artedius_lateralis" "Canis_lupus"
## [29] "Gasterosteus" "Artedius_fenestralis"
## [31] "Artedius_harringtoni" "Sus"
## [33] "Anoplopoma_fimbria" "Oxylebius_pictus"
## [35] "Aulorhynchus_flavidus" "Rhamphocottus_richardsonii"
## [37] "Oncorhynchus_tshawytscha" "Nautichthys_oculofasciatus"
## [39] "Lepidogobius_lepidus" "Liparis_fucensis"
## [41] "Brachyistius_frenatus" "Homo_sapiens"
## [43] "Gasterosteus_aculeatus" "Megaptera_novaeangliae"
## [45] "Embiotocidae" "Synchirus_gilli"
## [47] "Ophiodon_elongatus" "Pleuronichthys"
## [49] "Enhydra_lutris" "Oncorhynchus_kisutch"
## [51] "Citharichthys_sordidus" "Brosmophycis_marginata"
## [53] "Leptocottus_armatus" "Agonidae"
minuscontrol.long.bin$taxon <- as.factor(minuscontrol.long.bin$taxon)
levels(minuscontrol.long.bin$taxon)[levels(minuscontrol.long.bin$taxon)=="Oncorhynchus_kisutch"] <- "Oncorhynchus"
levels(minuscontrol.long.bin$taxon)[levels(minuscontrol.long.bin$taxon)=="Hexagrammos_decagrammus"] <- "Hexagrammos"
levels(minuscontrol.long.bin$taxon)[levels(minuscontrol.long.bin$taxon)=="Gasterosteus_aculeatus"] <- "Gasterosteus"
levels(minuscontrol.long.bin$taxon)[levels(minuscontrol.long.bin$taxon)=="Oncorhynchus_tshawytscha"] <- "Oncorhynchus"
# Oligocottus genus of sculpins
# Cottidae family of sculpins
# Anoplarchus genus of cockscombs
# Stichaeidae family of pricklebacks
# Apodichthys_flavidus penpoint gunnel
# Actinopteri - class of ray finned fishes general
# Polypera_greeni Lopefine snailfish
# Hemilepidotus - genus of sclupins (irish lords)
# Pleuronectidae family of right eye flounders
# Pleuronectidae is a genus within this family
# Gobiesox_maeandricus - northern clingfish
# Oxylebius_pictus painted greenling
# Nautichthys_oculofasciatus - sailfin sculpin
# Ophiodon_elongatus lingcod
# filter out canis_lupus, homo_sapiens, megapstera_novaeangliae, enhydra_lutris, Genus Sus (domestic pig), Genus Bos (beef),
# Genus Ovis (lamb)
edna.fish.long.bin <- minuscontrol.long.bin %>%
filter(taxon != "Canis_lupus" & taxon != "Homo_sapiens" &
taxon != "Sus" & taxon != "Bos" & taxon != "Ovis")
unique(edna.fish.long.bin$taxon) # now 46 unique occurrences
## [1] Pholis Oncorhynchus
## [3] Salmonidae Cymatogaster_aggregata
## [5] Clupeidae Citharichthys_stigmaeus
## [7] Ammodytidae Oligocottus
## [9] Hexagrammos Syngnathus_leptorhynchus
## [11] Gadidae Sebastidae
## [13] Stichaeidae Anoplarchus
## [15] Cottidae Clinocottus_acuticeps
## [17] Apodichthys_flavidus Enophrys_bison
## [19] Actinopteri Polypera_greeni
## [21] Hemilepidotus Chitonotus_pugetensis
## [23] Pleuronectidae Gobiesox_maeandricus
## [25] Artedius_lateralis Gasterosteus
## [27] Artedius_fenestralis Artedius_harringtoni
## [29] Anoplopoma_fimbria Oxylebius_pictus
## [31] Aulorhynchus_flavidus Rhamphocottus_richardsonii
## [33] Nautichthys_oculofasciatus Lepidogobius_lepidus
## [35] Liparis_fucensis Brachyistius_frenatus
## [37] Megaptera_novaeangliae Embiotocidae
## [39] Synchirus_gilli Ophiodon_elongatus
## [41] Pleuronichthys Enhydra_lutris
## [43] Citharichthys_sordidus Brosmophycis_marginata
## [45] Leptocottus_armatus Agonidae
## 50 Levels: Actinopteri Agonidae Ammodytidae Anoplarchus ... Syngnathus_leptorhynchus
edna.fish.long.bin$taxon <- as.character(edna.fish.long.bin$taxon)
# need to combine the presence/absence with the renaming of the levels of taxon
edna.fish.long.bin <- distinct(edna.fish.long.bin)
# creates list of fish species without any occurrences
no.occ.eel2 <- edna.fish.long.bin %>%
filter(habitat == "eelgrass" & Sample_rep != "control") %>%
group_by(taxon) %>%
dplyr::summarise(occurrence = sum(binary)) %>%
subset(occurrence == "0") %>%
dplyr::select(-occurrence)
# which ones are excluded?
no.occ.eel2
## # A tibble: 12 × 1
## taxon
## <chr>
## 1 Agonidae
## 2 Anoplopoma_fimbria
## 3 Brachyistius_frenatus
## 4 Brosmophycis_marginata
## 5 Citharichthys_sordidus
## 6 Enhydra_lutris
## 7 Gobiesox_maeandricus
## 8 Nautichthys_oculofasciatus
## 9 Ophiodon_elongatus
## 10 Oxylebius_pictus
## 11 Rhamphocottus_richardsonii
## 12 Synchirus_gilli
## `summarise()` has grouped output by 'sites'. You can override using the
## `.groups` argument.
Figure 9: Heat map of the presence/absence of taxon present in eelgrass meadows after subtracting the ASV counts present in the controls and reducing/combining similar or unrealiby primed species
# creates list of fish species without any occurrences
no.occ.kelp2 <- edna.fish.long.bin %>%
filter(habitat == "kelp" & Sample_rep != "control") %>%
group_by(taxon) %>%
dplyr::summarise(occurrence = sum(binary)) %>%
subset(occurrence == "0") %>%
dplyr::select(-occurrence)
# which ones are excluded?
no.occ.kelp2
## # A tibble: 13 × 1
## taxon
## <chr>
## 1 Agonidae
## 2 Brosmophycis_marginata
## 3 Chitonotus_pugetensis
## 4 Citharichthys_sordidus
## 5 Embiotocidae
## 6 Enhydra_lutris
## 7 Lepidogobius_lepidus
## 8 Leptocottus_armatus
## 9 Liparis_fucensis
## 10 Megaptera_novaeangliae
## 11 Ophiodon_elongatus
## 12 Pleuronichthys
## 13 Synchirus_gilli
## `summarise()` has grouped output by 'sites'. You can override using the
## `.groups` argument.
Figure 10: Heat map of the presence/absence of taxon present in understory kelp after subtracting the ASV counts present in the controls and reducing/combining similar or unrealiby primed species
unique(edna.fish.long.bin$habitat)
## [1] "eelgrass" "kelp" "mixed eelgrass"
no.occ.eel3 <- edna.fish.long.bin %>%
filter(sites == "Big Tree bay eel" | sites == "Clam island mixed" | sites == "Cole island mixed" |
sites == "Hunter bay eel" | sites == "Klawock airport eel" | sites == "Tah bay eel" & Sample_rep != "control") %>%
group_by(taxon) %>%
dplyr::summarise(occurrence = sum(binary)) %>%
subset(occurrence == "0") %>%
dplyr::select(-occurrence)
# which ones are excluded?
no.occ.eel3
## # A tibble: 10 × 1
## taxon
## <chr>
## 1 Anoplopoma_fimbria
## 2 Chitonotus_pugetensis
## 3 Embiotocidae
## 4 Gobiesox_maeandricus
## 5 Liparis_fucensis
## 6 Megaptera_novaeangliae
## 7 Nautichthys_oculofasciatus
## 8 Oxylebius_pictus
## 9 Polypera_greeni
## 10 Rhamphocottus_richardsonii
## `summarise()` has grouped output by 'sites'. You can override using the
## `.groups` argument.
Figure 11: Heat map of the presence/absence of taxon present in eelgrass or mixed eelgrass habitats after subtracting the ASV counts present in the controls and reducing/combining similar or unrealiby primed species
Okay so based on the graphs and exploration above, I’m going to use the dataframe that has the control samples subtracted by ASV from the field replicates. I have a couple goals for this section:
2.1 Explore community composition using multivariate approaches between eelgrass and understory kelp eDNA samples
2.2 Explore community composition using MV approaches between eelgrass and mixed eelgrass habitat
2.3 Add in associated beach seine data for the sites where its present
2.3.1 Compare within habitat community by beach seine and eelgrass (can this only be done using P/A for beach seine?)
Lets start with exploring the MV approaches with the eDNA samples
This is done at the site field replicate level (edna.bin.sp) and keeping the field and lab replicate as unique rows (edna.bin.samplecode_rep2). The rep2 drops four instances that have no genetic fish material.
# first lets take the df that we've cleaned up (no Bos etc in it) and turn it wide
edna.fish.wide <- edna.fish.long.bin %>%
separate(sample.PCR, into = c("sample.PCR", "rep"), sep = "-", remove = FALSE) %>%
unite(sample.PCR, sample.PCR:rep, sep = "-", remove = FALSE) %>%
unite(SampleCode_rep, c(SampleCode, rep)) %>%
group_by(SampleCode_rep, taxon) %>%
#summarize(counts = sum(binary)) %>%
#ungroup() %>%
#mutate(binary = ifelse(counts > 0, 1, 0)) %>%
pivot_wider(names_from = taxon,
values_from = binary, values_fn = sum) # values_fn = sum required because when we changed some levels of the taxon, there were repeat entries for unique rows (i.e. two entries for Oncorhyncus for e00289_A aka Sylburn_A_1_A). The sum adds them up so if it was 0,0 = 0 or 0,1 = 1. Have to check for instances of 1,1 = 2 cause that wont be valid in a binary form.
# above is by sample field replicate combining all tech replicates
# do this only if you're summarizing to field rep (no lab rep)
edna.bin.sp <- edna.fish.long.bin %>%
group_by(SampleCode, taxon) %>%
summarize(counts = sum(binary)) %>%
ungroup() %>%
mutate(binary = ifelse(counts > 0, 1, 0)) %>%
pivot_wider(names_from = taxon, id_cols = -counts,
values_from = binary, values_fn = sum) %>%
distinct() %>%
column_to_rownames(var = "SampleCode")
## `summarise()` has grouped output by 'SampleCode'. You can override using the
## `.groups` argument.
# for field and tech reps
edna.bin.samplecode_rep <- edna.fish.wide %>%
dplyr::select(c(SampleCode_rep, Pholis:Agonidae)) %>%
column_to_rownames(var = "SampleCode_rep")
# are there are field/lab rep combinations where no species were found?
sites_wo_reads <- edna.bin.samplecode_rep %>%
rownames_to_column(var = "SampleCode_rep") %>%
mutate(total = rowSums(select(., Pholis:Agonidae)))
# there are four sites/reps where no species were found
no_reads <- sites_wo_reads %>%
filter(total == 0) %>%
dplyr::select(SampleCode_rep)
edna.bin.samplecode_rep2 <- edna.bin.samplecode_rep %>%
rownames_to_column(var = "SampleCode_rep") %>%
anti_join(no_reads, by = c("SampleCode_rep")) %>%
column_to_rownames(var = "SampleCode_rep")
Extract just the site info
# site information
# separate the site information and the unnecessary columns
site.info.byrep <- edna.fish.wide %>%
anti_join(no_reads, by = "SampleCode_rep") %>%
dplyr::select(-c(Pholis:Agonidae)) %>%
distinct()
site.info.bysite <- edna.fish.long.bin %>%
dplyr::select(-c(sample.PCR, rep, taxon, binary)) %>%
distinct()
Focus is using jaccard distances because it works well with binary data and deals appropriate with the double zero issue (in that it doesn’t consider sites that are both missing species as more closely related).
# other binary distance dissimlarities - Dice, kluczynski, ochiai
# jaccard distance
edna.jacc <- vegan::vegdist(edna.bin.sp, method = "jaccard", binary = T)
plot(edna.jacc)
edna.jacc2 <- vegdist(edna.bin.samplecode_rep2, method = "jaccard", binary = T)
plot(edna.jacc2)
# or using the dist feature
# binary defaults to asymmetric binary, I dont know how this method deals with double zeros
edna.asybin <- dist(edna.bin.sp, method = "binary")
plot(edna.asybin)
# what happens when we use bray-curtis dis?
edna.bray <- vegdist(edna.bin.sp, method = "bray")
randomization test to compare within-group ‘dispersions’ with any distance measure. This tells you if the differences between sites are because of dispersion or variation or if it truly driven by a difference in sites (as can be determined by a PERMANOVA test)
H0 = within-group dispersion have no significant differences.
disp.hab <- betadisper(edna.jacc, site.info.bysite$habitat)
anova(disp.hab) # no within group dispersion by habitat
## Analysis of Variance Table
##
## Response: Distances
## Df Sum Sq Mean Sq F value Pr(>F)
## Groups 2 0.02136 0.010682 0.5701 0.5687
## Residuals 56 1.04930 0.018737
disp.sites <- betadisper(edna.jacc, site.info.bysite$sites)
## Warning in betadisper(edna.jacc, site.info.bysite$sites): some squared distances
## are negative and changed to zero
anova(disp.sites)
## Analysis of Variance Table
##
## Response: Distances
## Df Sum Sq Mean Sq F value Pr(>F)
## Groups 19 0.65826 0.034645 1.4316 0.1683
## Residuals 39 0.94380 0.024200
# there is a difference of dispersion between habitat types (eel, mixed eel, and kelp)
adonis2(edna.jacc ~ habitat, data = site.info.bysite)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = edna.jacc ~ habitat, data = site.info.bysite)
## Df SumOfSqs R2 F Pr(>F)
## habitat 2 0.7272 0.05539 1.642 0.027 *
## Residual 56 12.4011 0.94461
## Total 58 13.1284 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis2(edna.jacc ~ habitat + sites, data = site.info.bysite)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = edna.jacc ~ habitat + sites, data = site.info.bysite)
## Df SumOfSqs R2 F Pr(>F)
## habitat 2 0.7272 0.05539 2.3138 0.002 **
## sites 17 6.2725 0.47778 2.3479 0.001 ***
## Residual 39 6.1287 0.46683
## Total 58 13.1284 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis2(edna.jacc ~ sites + Sample_rep, data = site.info.bysite)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = edna.jacc ~ sites + Sample_rep, data = site.info.bysite)
## Df SumOfSqs R2 F Pr(>F)
## sites 19 6.9997 0.53317 2.3194 0.001 ***
## Sample_rep 1 0.0929 0.00708 0.5850 0.905
## Residual 38 6.0358 0.45975
## Total 58 13.1284 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# okays os there's a lot of variability even between unique sites, but at least there isn't signifcant variation between field replciates....
disp.hab2 <- betadisper(edna.jacc2, site.info.byrep$habitat)
anova(disp.hab2) # no within group dispersion by hab any permanova analysis will be accurate
## Analysis of Variance Table
##
## Response: Distances
## Df Sum Sq Mean Sq F value Pr(>F)
## Groups 2 0.0361 0.018052 1.3921 0.2514
## Residuals 166 2.1526 0.012967
adonis2(edna.jacc2 ~ habitat, data = site.info.byrep) # true diff between habitat types
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = edna.jacc2 ~ habitat, data = site.info.byrep)
## Df SumOfSqs R2 F Pr(>F)
## habitat 2 3.293 0.07485 6.7157 0.001 ***
## Residual 166 40.705 0.92515
## Total 168 43.998 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Brief foray into nmds - Do I think metric or non-metric more appropriate for this kind of data?
edna.nmds <- metaMDS(edna.jacc2, k = 3, autotransform = FALSE, distance = "jaccard",
maxit = 200000)
## Run 0 stress 0.1755208
## Run 1 stress 0.176172
## Run 2 stress 0.1754966
## ... New best solution
## ... Procrustes: rmse 0.00593025 max resid 0.05406784
## Run 3 stress 0.1754958
## ... New best solution
## ... Procrustes: rmse 0.002003623 max resid 0.01473266
## Run 4 stress 0.1760355
## Run 5 stress 0.175494
## ... New best solution
## ... Procrustes: rmse 0.001832357 max resid 0.02048417
## Run 6 stress 0.1755176
## ... Procrustes: rmse 0.005419839 max resid 0.05386633
## Run 7 stress 0.1758325
## ... Procrustes: rmse 0.009985301 max resid 0.07115606
## Run 8 stress 0.1758642
## ... Procrustes: rmse 0.0108324 max resid 0.07036324
## Run 9 stress 0.1754945
## ... Procrustes: rmse 0.001404606 max resid 0.01615048
## Run 10 stress 0.1755213
## ... Procrustes: rmse 0.004989001 max resid 0.05401339
## Run 11 stress 0.1760343
## Run 12 stress 0.1754997
## ... Procrustes: rmse 0.002035949 max resid 0.0155682
## Run 13 stress 0.1754954
## ... Procrustes: rmse 0.001657403 max resid 0.01849674
## Run 14 stress 0.1755178
## ... Procrustes: rmse 0.005510849 max resid 0.05383216
## Run 15 stress 0.1754965
## ... Procrustes: rmse 0.001534093 max resid 0.01124987
## Run 16 stress 0.1759254
## ... Procrustes: rmse 0.01411875 max resid 0.07707812
## Run 17 stress 0.1763075
## Run 18 stress 0.175496
## ... Procrustes: rmse 0.001075296 max resid 0.01112405
## Run 19 stress 0.1758709
## ... Procrustes: rmse 0.01377683 max resid 0.07722039
## Run 20 stress 0.175875
## ... Procrustes: rmse 0.01105803 max resid 0.07090979
## *** No convergence -- monoMDS stopping criteria:
## 20: stress ratio > sratmax
# with only 2 dimensions, the nmds didn't coverge and had stress over 20%
edna.nmds$stress
## [1] 0.175494
# with three dimensions
stressplot(edna.nmds) # this is with 3 dimension, 17% which is okay.
plot(edna.nmds$points)
scores(edna.nmds) %>%
as_tibble(rownames = "SampleCode_rep") %>%
inner_join(site.info.byrep) %>%
ggplot(aes(x = NMDS1, y = NMDS2, color = habitat)) +
geom_point()
## Joining, by = "SampleCode_rep"
scores(edna.nmds) %>%
as_tibble(rownames = "SampleCode_rep") %>%
inner_join(site.info.byrep) %>%
ggplot(aes(x = NMDS1, y = NMDS3, color = habitat)) +
geom_point()
## Joining, by = "SampleCode_rep"
scores(edna.nmds) %>%
as_tibble(rownames = "SampleCode_rep") %>%
inner_join(site.info.byrep) %>%
ggplot(aes(x = NMDS2, y = NMDS3, color = habitat)) +
geom_point()
## Joining, by = "SampleCode_rep"
Because its three dimensions, you need three nmds biplots to show all the data.
metric dimensional scaling - principal coordinates analysis
## trying pco or pcoa instead
pco.jacc <- cmdscale(edna.jacc)
colnames(pco.jacc) <- c("pcoa1", "pcoa2")
plot(pco.jacc)
pco.jacc %>%
as_tibble(rownames = "site_rep") %>%
ggplot(aes(x = pcoa1, y = pcoa2, color = site_rep)) +
geom_point()
# with by the site/lab rep
pco.jacc2 <- cmdscale(edna.jacc2, k = 2, eig = T, add = T)
colnames(pco.jacc2$points) <- c("pcoa1", "pcoa2")
# eiganvector - positions on the x and y axis (points)
# eiganvalues - indicate something about the proportion of the variation explained by the different axis. So we can use eiganvalues to get porpotion explained. Some of these eiganvalues are negative. Need to be rescaled (this is done by add = T)
# eiganvalue
pco.jacc2$eig # the % explained by axis 1 is the first value divided by the total of all the eiganvalues *100
## [1] 9.418245e+00 7.687394e+00 6.284328e+00 4.781093e+00 4.398711e+00
## [6] 4.384938e+00 3.834233e+00 3.319092e+00 3.054937e+00 2.827401e+00
## [11] 2.666692e+00 2.460855e+00 2.223211e+00 2.070871e+00 1.840878e+00
## [16] 1.689431e+00 1.651851e+00 1.521307e+00 1.321083e+00 1.255723e+00
## [21] 1.161111e+00 1.127508e+00 1.032665e+00 1.008386e+00 9.221189e-01
## [26] 8.561499e-01 7.912092e-01 7.032651e-01 5.861598e-01 5.454818e-01
## [31] 4.759691e-01 4.578212e-01 4.439191e-01 4.235516e-01 3.900656e-01
## [36] 3.768421e-01 3.731324e-01 3.076674e-01 2.912490e-01 2.738480e-01
## [41] 2.668085e-01 2.538221e-01 2.458901e-01 2.245208e-01 2.122595e-01
## [46] 1.947158e-01 1.818588e-01 1.762876e-01 1.695476e-01 1.656267e-01
## [51] 1.654952e-01 1.610277e-01 1.504242e-01 1.459950e-01 1.451106e-01
## [56] 1.339770e-01 1.287957e-01 1.255183e-01 1.218540e-01 1.152893e-01
## [61] 1.127251e-01 1.071096e-01 1.040992e-01 9.930245e-02 9.781347e-02
## [66] 9.220409e-02 8.860783e-02 8.535887e-02 8.430047e-02 8.128271e-02
## [71] 7.874789e-02 7.687874e-02 7.432777e-02 7.151337e-02 7.096586e-02
## [76] 6.970387e-02 6.856295e-02 6.766899e-02 6.625920e-02 6.201386e-02
## [81] 6.154367e-02 6.039003e-02 5.860613e-02 5.584851e-02 5.522972e-02
## [86] 5.354402e-02 5.234639e-02 5.126378e-02 5.002192e-02 4.896256e-02
## [91] 4.832931e-02 4.832931e-02 4.779363e-02 4.633251e-02 4.589692e-02
## [96] 4.496107e-02 4.472847e-02 4.355351e-02 4.298111e-02 4.208082e-02
## [101] 4.039268e-02 3.961277e-02 3.929020e-02 3.834494e-02 3.760925e-02
## [106] 3.744879e-02 3.684731e-02 3.660191e-02 3.636869e-02 3.585894e-02
## [111] 3.525055e-02 3.462335e-02 3.403580e-02 3.395768e-02 3.370631e-02
## [116] 3.324307e-02 3.306453e-02 3.262164e-02 3.193895e-02 3.169051e-02
## [121] 3.130162e-02 3.073200e-02 3.047962e-02 3.021021e-02 2.973662e-02
## [126] 2.953911e-02 2.875384e-02 2.858677e-02 2.820676e-02 2.769876e-02
## [131] 2.735734e-02 2.703473e-02 2.693055e-02 2.662891e-02 2.618943e-02
## [136] 2.585116e-02 2.552691e-02 2.519886e-02 2.500942e-02 2.421721e-02
## [141] 2.413799e-02 2.364218e-02 2.349831e-02 2.261751e-02 2.242767e-02
## [146] 2.211643e-02 2.153412e-02 2.111910e-02 2.068012e-02 2.038043e-02
## [151] 1.995317e-02 1.955489e-02 1.936211e-02 1.854824e-02 1.809719e-02
## [156] 1.764758e-02 1.748303e-02 1.663955e-02 1.602889e-02 1.547998e-02
## [161] 1.477243e-02 1.391673e-02 1.279243e-02 1.214712e-02 7.892128e-03
## [166] 7.176301e-03 5.138693e-03 -3.017496e-16 -1.602541e-14
percent_explained <- 100* pco.jacc2$eig/ sum(pco.jacc2$eig)
pe <- format(round(percent_explained[1:2], digits = 1), nsmall = 1, trim = T)
labs <- c(glue("PCoA 1 ({pe[1]}%)"),
glue("PcoA 2 ({pe[2]}%)"))
ordiplot(pco.jacc2, display = "sites")
ordiellipse(pco.jacc2, site.info.byrep$habitat)
Figure 12: Principal coordinates analysis of the environmental DNA presence/absence data. Each dog represents an individual field sample and technical replicate instance. Points are colored by habitat that they were sampled in
PCoA of all the habitat types: hard to see any clear patterns or clustering of the data points by habitat type. Each point is a unique combination of the field sample plus the lab replicate. So if we sampled 3 times in the field at a site and then did 3 lab replicates of each field sample it means that at each site we have 9 non control samples. However, since not every sample/lab replicate had genetic material in it I have to drop some of those replciates. Overall there were 169 rows of unique site:field rep:tech rep instances that went into the PCoA and distance calculation (using jaccard distances).
Still working on understanding if the same analytical approaches are valid for pcoa based data, but using the beta disper function, I determined that the assumption of homogeneity of variance holds and that the permanova test of habitat found that there are significant differences between fish communities in different habitat.
Using a PCoA we can determine how much variation in the data is explained when flattening this multidimensional data into two dimensions, approximately 18% of the data is explained.
To see the cumulative increase in variance explained increases with increased number of axes. Percent of the variance explained by each ordination axis when using the principal coordinates analysis.
# create scree plot that shows the percent explained
tibble(pe = cumsum(percent_explained),
axis = 1:length(percent_explained)) %>%
ggplot(aes(x = axis, y = pe)) +
geom_line() +
coord_cartesian(xlim = c(1,15), ylim = c(0, 75))
Just under 20% of the variation in the data are explained by the first and second axes of PCoA. This isn’t a ton but kinda to be expected when working with highly variable ecological data. To explain approximately 50% of the variation in the data you would need to visualize the data in 8 dimensions. Which my wee brain can’t comprehend.
#head(edna.bin.samplecode_rep2)
head(site.info.byrep)
## # A tibble: 6 × 8
## # Groups: SampleCode_rep [6]
## sites habitat sample SampleCode_rep sample.PCR Sample_rep Sample_date
## <chr> <chr> <chr> <chr> <chr> <int> <chr>
## 1 Sylburn harbo… eelgra… e00289 2021_SYLB_A_1… e00289-A 1 6/8/21
## 2 Sylburn harbo… eelgra… e00289 2021_SYLB_A_1… e00289-B 1 6/8/21
## 3 Sylburn harbo… eelgra… e00289 2021_SYLB_A_1… e00289-C 1 6/8/21
## 4 Sylburn harbo… eelgra… e00290 2021_SYLB_A_2… e00290-A 2 6/8/21
## 5 Sylburn harbo… eelgra… e00291 2021_SYLB_A_3… e00291-A 3 6/8/21
## 6 Sylburn harbo… eelgra… e00291 2021_SYLB_A_3… e00291-B 3 6/8/21
## # … with 1 more variable: Extraction_date <chr>
# subset dataframe to just the eelgrass
eel.samplecode_rep2 <- edna.bin.samplecode_rep2 %>%
rownames_to_column(var = "SampleCode_rep") %>%
inner_join(site.info.byrep) %>%
filter(habitat != "kelp") %>%
dplyr::select(c(SampleCode_rep, Pholis:Agonidae)) %>%
column_to_rownames(var = "SampleCode_rep")
## Joining, by = "SampleCode_rep"
# filter the site info too
eel.site.byrep <- site.info.byrep %>%
filter(habitat != "kelp")
# calculate distance matrix
eel.jacc <- vegdist(eel.samplecode_rep2, method = "jaccard", binary = T)
randomization test to compare within-group ‘dispersions’ with any distance measure. This tells you if the differences between sites are because of dispersion or variation or if it truly driven by a difference in sites (as can be determined by a PERMANOVA test)
H0 = within-group dispersion have no significant differences.
disp.eel.hab <- betadisper(eel.jacc, eel.site.byrep$habitat)
anova(disp.eel.hab)
## Analysis of Variance Table
##
## Response: Distances
## Df Sum Sq Mean Sq F value Pr(>F)
## Groups 1 0.02214 0.022136 1.4832 0.2259
## Residuals 109 1.62682 0.014925
# no within group dispersion by habitat
adonis2(eel.jacc ~ habitat, data = eel.site.byrep)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = eel.jacc ~ habitat, data = eel.site.byrep)
## Df SumOfSqs R2 F Pr(>F)
## habitat 1 1.0684 0.03913 4.4386 0.001 ***
## Residual 109 26.2373 0.96087
## Total 110 27.3058 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# okay so there are a difference between habitat, but could this just be the fact that these sites are super variable to begin with and this is actually driven by the variation in sites?
# test this
disp.eel.sites <- betadisper(eel.jacc, eel.site.byrep$sites)
anova(disp.eel.sites) # no within sites variability
## Analysis of Variance Table
##
## Response: Distances
## Df Sum Sq Mean Sq F value Pr(>F)
## Groups 12 0.2881 0.024009 1.5255 0.1278
## Residuals 98 1.5423 0.015738
adonis2(eel.jacc ~ habitat + sites, data = eel.site.byrep)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = eel.jacc ~ habitat + sites, data = eel.site.byrep)
## Df SumOfSqs R2 F Pr(>F)
## habitat 1 1.0684 0.03913 6.9471 0.001 ***
## sites 11 11.1655 0.40891 6.6000 0.001 ***
## Residual 98 15.0719 0.55197
## Total 110 27.3058 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis2(eel.jacc ~ sites, data = eel.site.byrep)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = eel.jacc ~ sites, data = eel.site.byrep)
## Df SumOfSqs R2 F Pr(>F)
## sites 12 12.234 0.44803 6.6289 0.001 ***
## Residual 98 15.072 0.55197
## Total 110 27.306 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# both between group significant difference between sites and habitats.
adonis2(eel.jacc ~ habitat + sites + Sample_rep, data = eel.site.byrep)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = eel.jacc ~ habitat + sites + Sample_rep, data = eel.site.byrep)
## Df SumOfSqs R2 F Pr(>F)
## habitat 1 1.0684 0.03913 6.9753 0.001 ***
## sites 11 11.1655 0.40891 6.6269 0.001 ***
## Sample_rep 1 0.2143 0.00785 1.3991 0.153
## Residual 97 14.8576 0.54412
## Total 110 27.3058 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Can the sites within habitat be considered replicates if they are significantly diff communities based on the eDNA field/tech replicates? At least the field replicates aren’t significantly different from each other.
looking at a metric approach
set.seed(15)
eel.nmds <- metaMDS(eel.jacc, k = 2, autotransform = FALSE, distance = "jaccard")
## Run 0 stress 0.2244486
## Run 1 stress 0.2244473
## ... New best solution
## ... Procrustes: rmse 0.0003962287 max resid 0.002339052
## ... Similar to previous best
## Run 2 stress 0.232157
## Run 3 stress 0.2328959
## Run 4 stress 0.2257184
## Run 5 stress 0.2256706
## Run 6 stress 0.2329274
## Run 7 stress 0.2403252
## Run 8 stress 0.2407788
## Run 9 stress 0.2245631
## ... Procrustes: rmse 0.0104074 max resid 0.08417811
## Run 10 stress 0.244189
## Run 11 stress 0.2243761
## ... New best solution
## ... Procrustes: rmse 0.004554862 max resid 0.0370087
## Run 12 stress 0.2244546
## ... Procrustes: rmse 0.0111772 max resid 0.1012627
## Run 13 stress 0.2247642
## ... Procrustes: rmse 0.01264607 max resid 0.1044135
## Run 14 stress 0.226008
## Run 15 stress 0.2243748
## ... New best solution
## ... Procrustes: rmse 0.00094231 max resid 0.005698233
## ... Similar to previous best
## Run 16 stress 0.2404102
## Run 17 stress 0.2432638
## Run 18 stress 0.2243766
## ... Procrustes: rmse 0.0009157378 max resid 0.008754532
## ... Similar to previous best
## Run 19 stress 0.2244484
## ... Procrustes: rmse 0.004491009 max resid 0.03493617
## Run 20 stress 0.2248043
## ... Procrustes: rmse 0.00903137 max resid 0.06921389
## *** Solution reached
eel.nmds$stress # uff it converged but the stress is 22%
## [1] 0.2243748
stressplot(eel.nmds) #bleck
visualize it
scores(eel.nmds) %>%
as_tibble(rownames = "SampleCode_rep") %>%
inner_join(eel.site.byrep) %>%
ggplot(aes(x = NMDS1, y = NMDS2, color = habitat)) +
geom_point(size = 4) #+
## Joining, by = "SampleCode_rep"
#stat_ellipse() # is the default stat_ellipse what I want?
Regardless of how this plot look/turns out, the stress is too high to reliably represent these data in only 2 dimensions.
I think potentially the better approach is to visualize this data in two dimensions with PCoA rather than nmds because it would require 3 dimensions to better visualize it.
pco.eel <- cmdscale(eel.jacc, k = 2, eig = T, add = T)
colnames(pco.eel$points) <- c("pcoa1", "pcoa2")
# eiganvector - positions on the x and y axis (points)
# eiganvalues - indicate something about the proportion of the variation explained by the different axis. So we can use eiganvalues to get porpotion explained. Some of these eiganvalues are negative. Need to be rescaled (this is done by add = T)
percent_explained2 <- 100* pco.eel$eig/ sum(pco.eel$eig)
pe2 <- format(round(percent_explained2[1:2], digits = 1), nsmall = 1, trim = T)
labs.eel <- c(glue("PCoA 1 ({pe2[1]}%)"),
glue("PcoA 2 ({pe2[2]}%)"))
pco.eel$points %>%
as_tibble(rownames = "SampleCode_rep") %>%
left_join(eel.site.byrep, by = "SampleCode_rep") %>%
ggplot(aes(x = pcoa1, y = pcoa2, color = sites)) +
geom_point() + labs(x = labs.eel[1], y = labs.eel[2])
Figure 13: Principal coordinates analysis of the environmental DNA presence/absence data for just eelgrass and mixed eelgrass habitat. Each dot represents an individual field sample and technical replicate instance. Points are colored by habitat that they were sampled in
Slightly more of the variation in the data is explained by the two axes (19% over the 18% with all sites).
scree plot
# create scree plot that shows the percent explained
tibble(pe2 = cumsum(percent_explained2),
axis = 1:length(percent_explained2)) %>%
ggplot(aes(x = axis, y = pe2)) +
geom_line() +
coord_cartesian(xlim = c(1,15), ylim = c(0, 75))
Okay overall, the eDNA is arguably potentially more variable than seine data (or variable in a different way). Things I’m unsure with how to deal:
Should I be combining the unique data from the technical replciates together? There are instances (that occur frequently) where one technical replicate will have different species occurrence than the other technical replicates.
Why do the technical replicates end up so different? Aren’t they supposed to be exact replicates mainly used for looking at any lab variability?
We dealt with the controls by subtracting the control read counts from the ASV present in each of the field/technical replciates - Is this the appropriate way of doing this? This doesn’t account for the proportional/compositional nature of the data that forces the sum to 1 requirement. In the control, there are likely less genetic material so its more likely (imo) for that material to be artifically exponenetially inflatted.
in each unique eDNA field/tech replciate unit we found between 0 - 17 species occurrences. This seems somewhat low compared to the beach seine data. Whats happening here?
Next step in the analysis is to add in the beach seine data, but in order to compare it to the eDNA data we need to match up the species/taxonomic groups. This is going to take some data manipulation
read in seine data - how many species did we find per seine?
what groups can we group the seine data into to best match the eDNA data, for example I know we’ll need to group the eDNA data for artedius sculpins into artedius in general because we have low confidence in identifying our lil’ artedius sculpins in the field.
calculate P/A for beach seinee and combine with eDNA
run MV approaches
head(seine)
## bay_code bay_sample place_name habitat so_region date YYYYMMDD
## 1 SYLB A Sylburn harbor eelgrass none 6/8/2021 20210608
## 2 SYLB A Sylburn harbor eelgrass none 6/8/2021 20210608
## 3 SYLB A Sylburn harbor eelgrass none 6/8/2021 20210608
## 4 SYLB A Sylburn harbor eelgrass none 6/8/2021 20210608
## 5 SYLB A Sylburn harbor eelgrass none 6/8/2021 20210608
## 6 SYLB A Sylburn harbor eelgrass none 6/8/2021 20210608
## start_time end_time slope tide_height tide_time species_common
## 1 600 630 steep -0.5 632 Pink salmon
## 2 600 630 steep -0.5 632 Pink salmon
## 3 600 630 steep -0.5 632 Chum salmon
## 4 600 630 steep -0.5 632 Snake prickleback
## 5 600 630 steep -0.5 632 Snake prickleback
## 6 600 630 steep -0.5 632 Snake prickleback
## species_scientific sp_code fork_length unmeasured unmeasured_sm
## 1 Oncorhynchus gorbuscha SALPINK 62 NA NA
## 2 Oncorhynchus gorbuscha SALPINK 98 NA NA
## 3 Oncorhynchus keta SALCHUM 115 NA NA
## 4 Lumpenus sagitta PRICKSN 138 NA NA
## 5 Lumpenus sagitta PRICKSN 145 NA NA
## 6 Lumpenus sagitta PRICKSN 149 NA NA
## taxon Sex notes FL_checking same.
## 1 vertebrate 62 TRUE
## 2 vertebrate 98 TRUE
## 3 vertebrate 115 TRUE
## 4 vertebrate 138 TRUE
## 5 vertebrate 145 TRUE
## 6 vertebrate 149 TRUE
unique(seine$unmeasured) # there are some unmeasured fishes
## [1] NA 17 20 3 5 183 11 1 967 1126 65 612 26 2 92
## [16] 217 110 4 411 32 59 10 88 22 55 48 24 196 86 134
## [31] 71 44 53 198 97 61 19 35 116 142 1270 126 21 222 266
## [46] 480 9
unique(seine$unmeasured_sm)
## [1] NA 10 306 61 29 107 39 42 32
unique(seine$species_scientific) # have 64 unique speices approximately
## [1] "Oncorhynchus gorbuscha" "Oncorhynchus keta"
## [3] "Lumpenus sagitta" "Anoplarchus insignis"
## [5] "Syngnathus leptorhynchus" "Sebastes sp"
## [7] "Hexagrammos lagocephalus" "Hexagrammos stelleri"
## [9] "Pholis laeta" "Cymatogaster aggregata"
## [11] "Leptocottus armatus" "Cottidae"
## [13] "Myoxocephalus polyacanthocephalus" "Artedius spp."
## [15] "Pugettia producta, Majidae" "Temessus cheiragonus"
## [17] "Cancer productus, Cancridae" "Sebastes caurinus"
## [19] "Sebastes maliger" "Oncorhynchus kisutch"
## [21] "Brachyistius frenatus" "Oxylebius pictus"
## [23] "Rhinogobiops nicholsii" "Enophrys bison"
## [25] "Gadus macrocephalus" "Blepsias cirrhosus"
## [27] "Ophiodon elongatus" "Nautichthys oculofasciatus"
## [29] "" "Limanda aspera"
## [31] "Parophrys vetulus" "Aulorhynchus flavidus"
## [33] "Podothecus accipenserinus" "Apodichthys flavidus"
## [35] "Citharichthys sordidus" "Pleuronectidae"
## [37] "Ammodytes personatus" "Lepidopsetta spp."
## [39] "Metacarcinus magister" "Pleuronichthys coenosus"
## [41] "Hemilepidotus hemilepidotus" "Hemilepidotus sp."
## [43] "Gasterosteus aculeatus" "Hexagrammos decagrammus"
## [45] "Hexagrammos octogrammus" "Citharichthys stigmaeus"
## [47] "Division Teleostei" "Parstichopus californicus"
## [49] "Clupea pallasii" "Metacarcinus gracilis"
## [51] "Salvelinus malma" "Ronquilus jordani"
## [53] "Sebastes melanops" "Lepidogobius lepidus"
## [55] "Platichthys stellatus" "Pallasina barbata"
## [57] "Oncorhynchus nerka" "Artedius harringtoni"
## [59] "Scorpaenidae" "Artedius fenestralis"
## [61] "Odontopyxis trispinosa" "Sebastes auriculatus"
## [63] "Hemilepidotus spinosus" "Oncorhynchus clarkii"
unique(seine$place_name)
## [1] "Sylburn harbor" "Reef harbor" "Kah shakes"
## [4] "Dall Bay" "Moira Sound" "Reef Harbor"
## [7] "Nichols Bay" "Tah Bay" "Hunter Bay"
## [10] "Port Refugio" "North Fish Egg Island" "Shinaku Inlet"
## [13] "Nossuk Bay" "Guktu Bay" "Natzuhini Bay"
## [16] "Bostwick Inlet"
# calculate total abundance
seine$abundance <- as.numeric(ifelse(is.na(seine$fork_length), ifelse(is.na(seine$unmeasured), paste(seine$unmeasured_sm), paste(seine$unmeasured)), 1))
## Warning: NAs introduced by coercion
# fish taxon for an entry of shiner perch
seine$taxon <- as.factor(seine$taxon)
levels(seine$taxon)[levels(seine$taxon)==""] <- "vertebrate"
seine$taxon <- as.character(seine$taxon)
# join in the habitat information and site names
loc.chp3 <- cbind(chp3, bay_id)
seine2 <- seine %>%
filter(taxon == "vertebrate") %>%
unite(bay_id, bay_code:bay_sample)
seine2$bay_id <- trimws(seine2$bay_id)
sgwd <- c("REFU_A", "NEFI_A", "NOSK_A", "SHIN_A", "NATZ_A", "GUKT_A")
fish <- seine2 %>%
dplyr::select(-c(FL_checking:same., so_region, fork_length, Sex, unmeasured, unmeasured_sm, notes)) %>%
subset(!(bay_id %in% sgwd)) %>%
left_join(loc.chp3, by = c("bay_id")) %>%
rename(habitat = habitat.y) %>%
dplyr::select(-habitat.x)
unique(fish$bay_id) # 16 seine sites - consistent
## [1] "SYLB_A" "SYLB_B" "REEF_A" "KAHS_A" "KAHS_B" "DALL_A" "MOIR_B" "DALL_B"
## [9] "MOIR_A" "REEF_B" "NICH_B" "NICH_A" "TAHB_A" "HUNT_A" "BOST_A" "BOST_B"
unique(fish$species_scientific) # 48 species for the seines, when we add back fourmissing sites now 51 species
## [1] "Oncorhynchus gorbuscha" "Oncorhynchus keta"
## [3] "Lumpenus sagitta" "Anoplarchus insignis"
## [5] "Syngnathus leptorhynchus" "Sebastes sp"
## [7] "Hexagrammos lagocephalus" "Hexagrammos stelleri"
## [9] "Pholis laeta" "Cymatogaster aggregata"
## [11] "Leptocottus armatus" "Cottidae"
## [13] "Myoxocephalus polyacanthocephalus" "Artedius spp."
## [15] "Sebastes caurinus" "Sebastes maliger"
## [17] "Oncorhynchus kisutch" "Brachyistius frenatus"
## [19] "Oxylebius pictus" "Rhinogobiops nicholsii"
## [21] "Enophrys bison" "Gadus macrocephalus"
## [23] "Blepsias cirrhosus" "Ophiodon elongatus"
## [25] "Nautichthys oculofasciatus" "Limanda aspera"
## [27] "Parophrys vetulus" "Aulorhynchus flavidus"
## [29] "Podothecus accipenserinus" "Apodichthys flavidus"
## [31] "Citharichthys sordidus" "Pleuronectidae"
## [33] "Ammodytes personatus" "Lepidopsetta spp."
## [35] "Pleuronichthys coenosus" "Hemilepidotus hemilepidotus"
## [37] "Hemilepidotus sp." "Gasterosteus aculeatus"
## [39] "Hexagrammos decagrammus" "Hexagrammos octogrammus"
## [41] "Citharichthys stigmaeus" "Division Teleostei"
## [43] "Clupea pallasii" "Salvelinus malma"
## [45] "Ronquilus jordani" "Sebastes melanops"
## [47] "Lepidogobius lepidus" "Platichthys stellatus"
## [49] "Pallasina barbata" "Oncorhynchus nerka"
## [51] "Oncorhynchus clarkii"
names(edna.bin.samplecode_rep2)
## [1] "Pholis" "Oncorhynchus"
## [3] "Salmonidae" "Cymatogaster_aggregata"
## [5] "Clupeidae" "Citharichthys_stigmaeus"
## [7] "Ammodytidae" "Oligocottus"
## [9] "Hexagrammos" "Syngnathus_leptorhynchus"
## [11] "Gadidae" "Sebastidae"
## [13] "Stichaeidae" "Anoplarchus"
## [15] "Cottidae" "Clinocottus_acuticeps"
## [17] "Apodichthys_flavidus" "Enophrys_bison"
## [19] "Actinopteri" "Polypera_greeni"
## [21] "Hemilepidotus" "Chitonotus_pugetensis"
## [23] "Pleuronectidae" "Gobiesox_maeandricus"
## [25] "Artedius_lateralis" "Gasterosteus"
## [27] "Artedius_fenestralis" "Artedius_harringtoni"
## [29] "Anoplopoma_fimbria" "Oxylebius_pictus"
## [31] "Aulorhynchus_flavidus" "Rhamphocottus_richardsonii"
## [33] "Nautichthys_oculofasciatus" "Lepidogobius_lepidus"
## [35] "Liparis_fucensis" "Brachyistius_frenatus"
## [37] "Megaptera_novaeangliae" "Embiotocidae"
## [39] "Synchirus_gilli" "Ophiodon_elongatus"
## [41] "Pleuronichthys" "Enhydra_lutris"
## [43] "Citharichthys_sordidus" "Brosmophycis_marginata"
## [45] "Leptocottus_armatus" "Agonidae"
We need to group the eDNA/seine data into greater genus/family depending on the situation
# Oncorhynchus/Salmonidae - seine
Salmon <- c("Oncorhynchus gorbuscha", "Oncorhynchus keta", "Salvelinus malma", "Oncorhynchus nerka", "Oncorhynchus kisutch",
"Oncorhynchus clarkii")
## eDNA
eSalmonidae <- c("Salmonidae", "Oncorhynchus")
# family Embiotocidae - seine
Embiotocidae <- c("Brachyistius frenatus", "Cymatogaster aggregata")
# eDNA
eEmbiotocidae <- c("Brachyistius_frenatus", "Cymatogaster_aggregata")
# Clupidae (family) - seine
Clupidae <- c("Clupea pallasii")
# Ammodytidae - seine
Ammodytidae <- "Ammodytes personatus"
# Cottidae - seine
Cottidae <- c("Leptocottus armatus", "Cottidae", "Myoxocephalus polyacanthocephalus",
"Artedius spp.", "Enophrys bison", "Hemilepidotus sp.", "Hemilepidotus hemilepidotus")
## eDNA
eCottidae <- c("Clinocottus_acuticeps", "Chitonotus_pugetensis", "Artedius_fenestralis", "Artedius_harringtoni",
"Artedius_lateralis", "Enophrys_bison", "Leptocottus_armatus", "Synchirus_gilli", "Hemilepidotus",
"Oligocottus")
# Gadidae - seine
Gadidae <- "Gadus macrocephalus"
# Hexagrammidae - seine
Hexagrammidae <- c("Hexagrammos decagrammus", "Hexagrammos octogrammus", "Hexagrammos lagocephalus",
"Hexagrammos stelleri", "Ophiodon elongatus", "Oxylebius pictus")
## eDNA
eHexagrammidae <- c("Hexagrammos", "Oxylebius_pictus", "Ophiodon_elongatus")
# Sebastes sp - seine
Sebastidae <- c("Sebastes sp", "Sebastes caurinus", "Sebastes maliger", "Sebastes melanops")
# Anoplarchus insignis - slender cockscomb - seine
Stichaeidae <- c("Anoplarchus insignis", "Lumpenus sagitta")
## eDNA
eStichaeidae <- c("Anoplarchus", "Stichaeidae")
# Pholis - seine, family
Pholidae <- c("Pholis laeta", "Apodichthys flavidus", "Pholis")
## eDNA pholis
ePholidae <- c("Pholis", "Apodichthys_flavidus")
# Agonidae (Poachers) - seine
Agonidae <- c("Pallasina barbata", "Podothecus accipenserinus")
## eDNA
# areAgnoidae
## Flatfishes ## seine
# Citharichthys right now keep as species specific found in both seine and eDNA
# change the seine data
# right eyed flatfishes
Pleuronectidae <- c("Parophrys vetulus", "Pleuronichthys coenosus", "Limanda aspera", "Lepidopsetta spp.", "Platichthys stellatus")
## edna
ePleuronectidae <- c("Pleuronichthys", "Pleuronectidae")
# three spines- Gasterosteus
Gasterosteus <- c("Gasterosteus aculeatus")
# higher order
Teleostei <- c("Division Teleostei")
eTeleostei <- c("Actinopteri")
fish$species_scientific <- sapply(fish["species_scientific"], function(x) replace(x, x %in% Salmon, "Salmonidae"))
fish$species_scientific <- sapply(fish["species_scientific"], function(x) replace(x, x %in% Embiotocidae, "Embiotocidae"))
fish$species_scientific <- sapply(fish["species_scientific"], function(x) replace(x, x %in% Clupidae, "Clupidae"))
fish$species_scientific <- sapply(fish["species_scientific"], function(x) replace(x, x %in% Ammodytidae, "Ammodytidae"))
fish$species_scientific <- sapply(fish["species_scientific"], function(x) replace(x, x %in% Cottidae, "Cottidae"))
fish$species_scientific <- sapply(fish["species_scientific"], function(x) replace(x, x %in% Gadidae, "Gadidae"))
fish$species_scientific <- sapply(fish["species_scientific"], function(x) replace(x, x %in% Hexagrammidae, "Hexagrammidae"))
fish$species_scientific <- sapply(fish["species_scientific"], function(x) replace(x, x %in% Sebastidae, "Sebastidae"))
fish$species_scientific <- sapply(fish["species_scientific"], function(x) replace(x, x %in% Pholidae, "Pholidae"))
fish$species_scientific <- sapply(fish["species_scientific"], function(x) replace(x, x %in% Stichaeidae, "Stichaeidae"))
fish$species_scientific <- sapply(fish["species_scientific"], function(x) replace(x, x %in% Agonidae, "Agonidae"))
fish$species_scientific <- sapply(fish["species_scientific"], function(x) replace(x, x %in% Pleuronectidae, "Pleuronectidae"))
fish$species_scientific <- sapply(fish["species_scientific"], function(x) replace(x, x %in% Gasterosteus, "Gasterosteus"))
fish$species_scientific <- sapply(fish["species_scientific"], function(x) replace(x, x %in% Teleostei, "Teleostei"))
fish$species_scientific <- fish$species_scientific[,1]
str(fish)
## 'data.frame': 2594 obs. of 16 variables:
## $ bay_id : chr "SYLB_A" "SYLB_A" "SYLB_A" "SYLB_A" ...
## $ place_name : chr "Sylburn harbor" "Sylburn harbor" "Sylburn harbor" "Sylburn harbor" ...
## $ date : chr "6/8/2021" "6/8/2021" "6/8/2021" "6/8/2021" ...
## $ YYYYMMDD : int 20210608 20210608 20210608 20210608 20210608 20210608 20210608 20210608 20210608 20210608 ...
## $ start_time : int 600 600 600 600 600 600 600 600 600 600 ...
## $ end_time : int 630 630 630 630 630 630 630 630 630 630 ...
## $ slope : chr "steep" "steep" "steep" "steep" ...
## $ tide_height : num -0.5 -0.5 -0.5 -0.5 -0.5 -0.5 -0.5 -0.5 -0.5 -0.5 ...
## $ tide_time : int 632 632 632 632 632 632 632 632 632 632 ...
## $ species_common : chr "Pink salmon" "Pink salmon" "Chum salmon" "Snake prickleback" ...
## $ species_scientific: chr "Salmonidae" "Salmonidae" "Salmonidae" "Stichaeidae" ...
## $ sp_code : chr "SALPINK" "SALPINK" "SALCHUM" "PRICKSN" ...
## $ taxon : chr "vertebrate" "vertebrate" "vertebrate" "vertebrate" ...
## $ abundance : num 1 1 1 1 1 1 1 1 1 1 ...
## $ sites : chr "Sylburn harbor eel" "Sylburn harbor eel" "Sylburn harbor eel" "Sylburn harbor eel" ...
## $ habitat : chr "eelgrass" "eelgrass" "eelgrass" "eelgrass" ...
fish$species_scientific <- as.factor(fish$species_scientific)
# change the eDNA data
edna.long <- edna.bin.sp %>%
rownames_to_column(var = "SampleCode_rep") %>%
dplyr::select(-c(Enhydra_lutris, Megaptera_novaeangliae)) %>%
pivot_longer(cols = Actinopteri:Syngnathus_leptorhynchus, names_to = "species_scientific", values_to = "binary")
edna.long$species_scientific <- sapply(edna.long["species_scientific"], function(x) replace(x, x %in% eSalmonidae, "Salmonidae"))
edna.long$species_scientific <- sapply(edna.long["species_scientific"], function(x) replace(x, x %in% eEmbiotocidae, "Embiotocidae"))
edna.long$species_scientific <- sapply(edna.long["species_scientific"], function(x) replace(x, x %in% eCottidae, "Cottidae"))
edna.long$species_scientific <- sapply(edna.long["species_scientific"], function(x) replace(x, x %in% eHexagrammidae, "Hexagrammidae"))
edna.long$species_scientific <- sapply(edna.long["species_scientific"], function(x) replace(x, x %in% ePholidae, "Pholidae"))
edna.long$species_scientific <- sapply(edna.long["species_scientific"], function(x) replace(x, x %in% eStichaeidae, "Stichaeidae"))
edna.long$species_scientific <- sapply(edna.long["species_scientific"], function(x) replace(x, x %in% ePleuronectidae, "Pleuronectidae"))
edna.long$species_scientific <- sapply(edna.long["species_scientific"], function(x) replace(x, x %in% eTeleostei, "Teleostei"))
edna.long$species_scientific <- edna.long$species_scientific[,1]
str(edna.long)
## tibble [2,596 × 3] (S3: tbl_df/tbl/data.frame)
## $ SampleCode_rep : chr [1:2596] "2021_BOST_A_1" "2021_BOST_A_1" "2021_BOST_A_1" "2021_BOST_A_1" ...
## $ species_scientific: chr [1:2596] "Teleostei" "Agonidae" "Ammodytidae" "Stichaeidae" ...
## $ binary : num [1:2596] 1 0 0 0 0 0 0 0 0 0 ...
edna.long$species_scientific <- gsub("_", " ", edna.long$species_scientific)
# check the species in seines and eDNA
levels(as.factor(fish$species_scientific))
## [1] "Agonidae" "Ammodytidae"
## [3] "Aulorhynchus flavidus" "Blepsias cirrhosus"
## [5] "Citharichthys sordidus" "Citharichthys stigmaeus"
## [7] "Clupidae" "Cottidae"
## [9] "Embiotocidae" "Gadidae"
## [11] "Gasterosteus" "Hexagrammidae"
## [13] "Lepidogobius lepidus" "Nautichthys oculofasciatus"
## [15] "Pholidae" "Pleuronectidae"
## [17] "Rhinogobiops nicholsii" "Ronquilus jordani"
## [19] "Salmonidae" "Sebastidae"
## [21] "Stichaeidae" "Syngnathus leptorhynchus"
## [23] "Teleostei"
levels(as.factor(edna.long$species_scientific))
## [1] "Agonidae" "Ammodytidae"
## [3] "Anoplopoma fimbria" "Aulorhynchus flavidus"
## [5] "Brosmophycis marginata" "Citharichthys sordidus"
## [7] "Citharichthys stigmaeus" "Clupeidae"
## [9] "Cottidae" "Embiotocidae"
## [11] "Gadidae" "Gasterosteus"
## [13] "Gobiesox maeandricus" "Hexagrammidae"
## [15] "Lepidogobius lepidus" "Liparis fucensis"
## [17] "Nautichthys oculofasciatus" "Pholidae"
## [19] "Pleuronectidae" "Polypera greeni"
## [21] "Rhamphocottus richardsonii" "Salmonidae"
## [23] "Sebastidae" "Stichaeidae"
## [25] "Syngnathus leptorhynchus" "Teleostei"
Okay so now we’ve figured out where these species can overlap and “rounded up” taxonomically for instances where eDNA only was able to identify species to the genus or family level - lets combine the data frames
glimpse(fish)
## Rows: 2,594
## Columns: 16
## $ bay_id <chr> "SYLB_A", "SYLB_A", "SYLB_A", "SYLB_A", "SYLB_A", "…
## $ place_name <chr> "Sylburn harbor", "Sylburn harbor", "Sylburn harbor…
## $ date <chr> "6/8/2021", "6/8/2021", "6/8/2021", "6/8/2021", "6/…
## $ YYYYMMDD <int> 20210608, 20210608, 20210608, 20210608, 20210608, 2…
## $ start_time <int> 600, 600, 600, 600, 600, 600, 600, 600, 600, 600, 6…
## $ end_time <int> 630, 630, 630, 630, 630, 630, 630, 630, 630, 630, 6…
## $ slope <chr> "steep", "steep", "steep", "steep", "steep", "steep…
## $ tide_height <dbl> -0.5, -0.5, -0.5, -0.5, -0.5, -0.5, -0.5, -0.5, -0.…
## $ tide_time <int> 632, 632, 632, 632, 632, 632, 632, 632, 632, 632, 6…
## $ species_common <chr> "Pink salmon", "Pink salmon", "Chum salmon", "Snake…
## $ species_scientific <fct> Salmonidae, Salmonidae, Salmonidae, Stichaeidae, St…
## $ sp_code <chr> "SALPINK", "SALPINK", "SALCHUM", "PRICKSN", "PRICKS…
## $ taxon <chr> "vertebrate", "vertebrate", "vertebrate", "vertebra…
## $ abundance <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ sites <chr> "Sylburn harbor eel", "Sylburn harbor eel", "Sylbur…
## $ habitat <chr> "eelgrass", "eelgrass", "eelgrass", "eelgrass", "ee…
glimpse(edna.long)
## Rows: 2,596
## Columns: 3
## $ SampleCode_rep <chr> "2021_BOST_A_1", "2021_BOST_A_1", "2021_BOST_A_1", …
## $ species_scientific <chr> "Teleostei", "Agonidae", "Ammodytidae", "Stichaeida…
## $ binary <dbl> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
fish$gear <- "beach_seine"
seine.info <- fish %>%
dplyr::select(place_name:tide_time, bay_id, habitat) %>%
distinct()
sp.info <- fish %>%
dplyr::select(c(bay_id, place_name, habitat, species_common, species_scientific, sp_code))
fish$species_scientific <- as.character(fish$species_scientific)
fish2 <- fish %>%
dplyr::group_by(bay_id, sites, species_scientific) %>%
dplyr::mutate(counts = sum(abundance)) %>%
dplyr::select(-abundance) %>%
distinct() %>%
unite(bay_id_gear, c(bay_id, gear)) %>%
ungroup() %>%
distinct() %>%
dplyr::select(c(bay_id_gear, species_scientific, counts)) %>%
group_by(bay_id_gear, species_scientific) %>%
dplyr::summarise(counts2 = sum(counts)) %>%
mutate(binary = if_else(counts2 > 0, 1, 0)) %>%
dplyr::select(-counts2) %>%
distinct()
## `summarise()` has grouped output by 'bay_id_gear'. You can override using the
## `.groups` argument.
glimpse(fish2)
## Rows: 154
## Columns: 3
## Groups: bay_id_gear [16]
## $ bay_id_gear <chr> "BOST_A_beach_seine", "BOST_A_beach_seine", "BOST_A…
## $ species_scientific <chr> "Ammodytidae", "Cottidae", "Embiotocidae", "Pholida…
## $ binary <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
# options here when dealing with the eDNA.long, we can either keep the field replicates separate (as the following) or we can combine the replicates together
edna.by.rep <- edna.long %>%
separate(SampleCode_rep, into = c("year", "bay_code", "bay_letter", "rep")) %>%
unite(bay_id, bay_code:bay_letter) %>%
group_by(bay_id, species_scientific, rep) %>%
dplyr::mutate(total = sum(binary)) %>%
dplyr::select(-binary) %>%
mutate(binary = if_else(total > 0, 1, 0)) %>%
dplyr::select(-total) %>%
distinct()
edna.by.rep %>%
group_by(bay_id, species_scientific) %>%
dplyr::summarize(total = sum(binary)) %>%
ggplot() +
geom_tile(aes(x = bay_id, y = reorder(species_scientific, -total), fill = total)) +
plot_theme() +
theme(axis.text.x = element_text(angle = 45))
## `summarise()` has grouped output by 'bay_id'. You can override using the
## `.groups` argument.
edna.by.rep$gear <- "eDNA"
edna.by.site <- edna.by.rep %>%
group_by(bay_id, species_scientific, gear) %>%
dplyr::summarise(total = sum(binary)) %>%
distinct() %>%
mutate(binary = if_else(total > 0, 1, 0)) %>%
dplyr::select(-total) %>%
distinct() %>%
unite(bay_id_gear, c(bay_id, gear))
## `summarise()` has grouped output by 'bay_id', 'species_scientific'. You can
## override using the `.groups` argument.
# combine beach seine and eDNA data, by site by species and pivot wide
full.dat <- rbind(fish2, edna.by.site) %>%
pivot_wider(names_from = "species_scientific", values_from = "binary", values_fill = 0) %>%
column_to_rownames("bay_id_gear")
head(full.dat)
## Ammodytidae Cottidae Embiotocidae Pholidae
## BOST_A_beach_seine 1 1 1 1
## BOST_B_beach_seine 0 1 1 1
## DALL_A_beach_seine 0 1 1 1
## DALL_B_beach_seine 1 1 1 1
## HUNT_A_beach_seine 0 1 1 1
## KAHS_A_beach_seine 0 1 1 1
## Syngnathus leptorhynchus Aulorhynchus flavidus Hexagrammidae
## BOST_A_beach_seine 1 0 0
## BOST_B_beach_seine 1 1 1
## DALL_A_beach_seine 1 1 1
## DALL_B_beach_seine 0 0 1
## HUNT_A_beach_seine 1 0 0
## KAHS_A_beach_seine 1 1 1
## Pleuronectidae Salmonidae Citharichthys stigmaeus Gadidae
## BOST_A_beach_seine 0 0 0 0
## BOST_B_beach_seine 1 1 0 0
## DALL_A_beach_seine 1 1 1 1
## DALL_B_beach_seine 0 1 0 0
## HUNT_A_beach_seine 0 1 0 0
## KAHS_A_beach_seine 1 1 0 1
## Gasterosteus Stichaeidae Teleostei Clupidae Sebastidae
## BOST_A_beach_seine 0 0 0 0 0
## BOST_B_beach_seine 0 0 0 0 0
## DALL_A_beach_seine 1 1 1 0 0
## DALL_B_beach_seine 1 0 0 1 1
## HUNT_A_beach_seine 1 1 0 0 0
## KAHS_A_beach_seine 1 0 0 0 1
## Agonidae Citharichthys sordidus Blepsias cirrhosus
## BOST_A_beach_seine 0 0 0
## BOST_B_beach_seine 0 0 0
## DALL_A_beach_seine 0 0 0
## DALL_B_beach_seine 0 0 0
## HUNT_A_beach_seine 1 1 0
## KAHS_A_beach_seine 0 0 1
## Rhinogobiops nicholsii Ronquilus jordani
## BOST_A_beach_seine 0 0
## BOST_B_beach_seine 0 0
## DALL_A_beach_seine 0 0
## DALL_B_beach_seine 0 0
## HUNT_A_beach_seine 0 0
## KAHS_A_beach_seine 0 0
## Nautichthys oculofasciatus Lepidogobius lepidus
## BOST_A_beach_seine 0 0
## BOST_B_beach_seine 0 0
## DALL_A_beach_seine 0 0
## DALL_B_beach_seine 0 0
## HUNT_A_beach_seine 0 0
## KAHS_A_beach_seine 0 0
## Anoplopoma fimbria Brosmophycis marginata Clupeidae
## BOST_A_beach_seine 0 0 0
## BOST_B_beach_seine 0 0 0
## DALL_A_beach_seine 0 0 0
## DALL_B_beach_seine 0 0 0
## HUNT_A_beach_seine 0 0 0
## KAHS_A_beach_seine 0 0 0
## Gobiesox maeandricus Liparis fucensis Polypera greeni
## BOST_A_beach_seine 0 0 0
## BOST_B_beach_seine 0 0 0
## DALL_A_beach_seine 0 0 0
## DALL_B_beach_seine 0 0 0
## HUNT_A_beach_seine 0 0 0
## KAHS_A_beach_seine 0 0 0
## Rhamphocottus richardsonii
## BOST_A_beach_seine 0
## BOST_B_beach_seine 0
## DALL_A_beach_seine 0
## DALL_B_beach_seine 0
## HUNT_A_beach_seine 0
## KAHS_A_beach_seine 0
full.dat.long <- pivot_longer(data = full.dat, cols = c("Aulorhynchus flavidus":"Rhamphocottus richardsonii"), names_to = "species_scientific", values_to = "binary")
calculate distances for the combined eDNA/beach seine data
comb.jacc <- vegdist(full.dat, method = "jaccard", binary = T)
plot(comb.jacc)
# for just eelgrass sites
full.eel <- full.dat %>%
rownames_to_column(var = "bay_id_gear") %>%
separate("bay_id_gear", into = c("bay", "id", "gear"), extra = "merge") %>%
unite(bay_id, bay:id) %>%
left_join(seine.info) %>%
filter(habitat != "kelp") %>%
unite(bay_id_gear, bay_id:gear) %>%
column_to_rownames(var = "bay_id_gear") %>%
dplyr::select(-c(place_name:habitat))
## Joining, by = "bay_id"
full.kelp <- full.dat %>%
rownames_to_column(var = "bay_id_gear") %>%
separate("bay_id_gear", into = c("bay", "id", "gear"), extra = "merge") %>%
unite(bay_id, bay:id) %>%
left_join(seine.info) %>%
filter(habitat == "kelp") %>%
unite(bay_id_gear, bay_id:gear) %>%
column_to_rownames(var = "bay_id_gear") %>%
dplyr::select(-c(place_name:habitat))
## Joining, by = "bay_id"
eelc.jacc <- vegdist(full.eel, method = "jaccard", binary = T)
plot(eelc.jacc)
kelpc.jacc <- vegdist(full.kelp, method = "jaccard", binary = T)
plot(kelpc.jacc)
looking at a metric approach
comb.nmds <- metaMDS(comb.jacc, k = 2, autotransform = FALSE, distance = "jaccard", maxit = 9999)
## Run 0 stress 0.2201753
## Run 1 stress 0.2231484
## Run 2 stress 0.2256415
## Run 3 stress 0.2217775
## Run 4 stress 0.215838
## ... New best solution
## ... Procrustes: rmse 0.05748619 max resid 0.3072469
## Run 5 stress 0.2229849
## Run 6 stress 0.2234483
## Run 7 stress 0.2340127
## Run 8 stress 0.2488474
## Run 9 stress 0.2203582
## Run 10 stress 0.215838
## ... Procrustes: rmse 5.323036e-06 max resid 1.381031e-05
## ... Similar to previous best
## Run 11 stress 0.2274133
## Run 12 stress 0.2274133
## Run 13 stress 0.2473649
## Run 14 stress 0.215838
## ... New best solution
## ... Procrustes: rmse 3.892696e-06 max resid 1.212149e-05
## ... Similar to previous best
## Run 15 stress 0.2267478
## Run 16 stress 0.215838
## ... Procrustes: rmse 1.002353e-05 max resid 2.993802e-05
## ... Similar to previous best
## Run 17 stress 0.2229802
## Run 18 stress 0.2228821
## Run 19 stress 0.2291645
## Run 20 stress 0.239331
## *** Solution reached
comb.nmds$stress # 21% kinda almost past what we should accept
## [1] 0.215838
stressplot(comb.nmds)
comb.nmds.pts <- as.data.frame(comb.nmds$points)
comb.nmds.pts %>%
rownames_to_column(var = "bay_id_gear") %>%
separate(bay_id_gear, into = c("bay", "id", "gear"), extra = "merge") %>%
unite(bay_id, bay:id) %>%
ggplot(aes(x = MDS1, y = MDS2, color = bay_id)) +
geom_point() +
geom_line()
##PCoA
pco.combo <- cmdscale(comb.jacc, k = 2, eig = T, add = T)
colnames(pco.combo$points) <- c("pcoa1", "pcoa2")
# eiganvector - positions on the x and y axis (points)
# eiganvalues - indicate something about the proportion of the variation explained by the different axis. So we can use eiganvalues to get porpotion explained. Some of these eiganvalues are negative. Need to be rescaled (this is done by add = T)
percent_explained3 <- 100* pco.combo$eig/ sum(pco.combo$eig)
pe3 <- format(round(percent_explained3[1:2], digits = 1), nsmall = 1, trim = T)
pco.perc <- c(glue("PCoA 1 ({pe3[1]}%)"),
glue("PcoA 2 ({pe3[2]}%)"))
pco.combo$points %>%
as_tibble(rownames = "bay_id_gear") %>%
separate(bay_id_gear, into = c("bay", "id", "gear", "extra"), extra = "merge") %>%
dplyr::select(-extra) %>%
unite(bay_id, bay:id) %>%
left_join(seine.info) %>%
ggplot(aes(x = pcoa1, y = pcoa2)) +
geom_point(aes(color = as.factor(habitat))) +
labs(x = pco.perc[1], y = pco.perc[2])
## Warning: Expected 4 pieces. Missing pieces filled with `NA` in 20 rows [17, 18,
## 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36].
## Joining, by = "bay_id"
Lets look at just the eelgrass sites and see how the variability by gear type shows up.
# eelc.jacc - jaccard distance eelgrass only sites (includes mixed eelgrass)
eelc.pco <- cmdscale(eelc.jacc, k =2, eig = T, add = T)
colnames(eelc.pco$points) <- c("pcoa1", "pcoa2")
percent_explained4 <- 100* eelc.pco$eig/ sum(eelc.pco$eig)
pe4 <- format(round(percent_explained4[1:2], digits = 1), nsmall = 1, trim = T)
pco.perc4 <- c(glue("PCoA 1 ({pe4[1]}%)"),
glue("PcoA 2 ({pe4[2]}%)"))
eelc.pco$points %>%
as_tibble(rownames = "bay_id_gear") %>%
separate(bay_id_gear, into = c("bay", "id", "gear", "extra"), extra = "merge") %>%
dplyr::select(-extra) %>%
unite(bay_id, bay:id) %>%
left_join(seine.info) %>%
ggplot(aes(x = pcoa1, y = pcoa2)) +
geom_point(aes(color = gear)) +
labs(x = pco.perc4[1], y = pco.perc4[2], main = "PCoA of eelgrass sites sampled with beach seine or eDNA") +
stat_ellipse(aes(color = gear))
## Warning: Expected 4 pieces. Missing pieces filled with `NA` in 9 rows [10, 11,
## 12, 13, 14, 15, 16, 17, 18].
## Joining, by = "bay_id"
Using nmds approaches, we can also test for homogeneity of dispersion and differences between species comp of group.
Randomization test to compare within-group ‘dispersions’ with any distance measure. This tells you if the differences between sites are because of dispersion or variation or if it truly driven by a difference in sites (as can be determined by a PERMANOVA test)
H0 = within-group dispersion have no significant differences.
# extract gear info from distance matrix
gear.eel <- full.eel %>%
rownames_to_column(var = "bay_id_gear") %>%
separate(bay_id_gear, into = c("bay", "id", "gear"), extra = "merge") %>%
dplyr::select(c(bay, id, gear)) %>%
unite(bay_id, bay:id)
disp.gear <- betadisper(eelc.jacc, gear.eel$gear)
anova(disp.gear)
## Analysis of Variance Table
##
## Response: Distances
## Df Sum Sq Mean Sq F value Pr(>F)
## Groups 1 0.013085 0.0130849 2.1579 0.1612
## Residuals 16 0.097019 0.0060637
H0 accepted, there is no significant variation within gear type groups
PERMANOVA: Then test for significant difference in species composition between gear
If the betadisper/analysis of dispersion null hypothesis is REJECTED then any difference in the PERMANOVA analysis between groups - could be driven by this difference in dispersion rather than a true difference in species composition between groups (in this case years).
PERMANOVA assumption: 1) independence 2) homogeneity of dispersion Meaning samples/objects are exchangeable, if H0 is true. Always test diff in dispersion among groups FIRST (done above w/ betadisper)
# analysis of dispersion accepted the null hypothesis
# This is the test of "location", if there are sig. changes in species composition between gear groups
# H0 no group differences
adonis2(eelc.jacc ~ gear, data = gear.eel)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = eelc.jacc ~ gear, data = gear.eel)
## Df SumOfSqs R2 F Pr(>F)
## gear 1 0.50519 0.20871 4.2201 0.001 ***
## Residual 16 1.91535 0.79129
## Total 17 2.42053 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
There is a significant variation in species composition betweeen gear type when comparing beach seine and eDNA. This can be visualized using stat_ellipse in ggplot in the biplot of nmds below.
eel.nmds <- metaMDS(eelc.jacc, k = 2, autotransform = FALSE, distance = "jaccard", maxit = 9999)
## Run 0 stress 0.168232
## Run 1 stress 0.1918953
## Run 2 stress 0.168232
## ... Procrustes: rmse 6.950192e-06 max resid 1.525616e-05
## ... Similar to previous best
## Run 3 stress 0.1934766
## Run 4 stress 0.168232
## ... Procrustes: rmse 1.236767e-05 max resid 2.069534e-05
## ... Similar to previous best
## Run 5 stress 0.168232
## ... Procrustes: rmse 2.125935e-05 max resid 4.581788e-05
## ... Similar to previous best
## Run 6 stress 0.1918952
## Run 7 stress 0.1814245
## Run 8 stress 0.1934766
## Run 9 stress 0.1923123
## Run 10 stress 0.193536
## Run 11 stress 0.168232
## ... Procrustes: rmse 8.572848e-06 max resid 1.868903e-05
## ... Similar to previous best
## Run 12 stress 0.168232
## ... Procrustes: rmse 1.453763e-05 max resid 2.626865e-05
## ... Similar to previous best
## Run 13 stress 0.193536
## Run 14 stress 0.168232
## ... New best solution
## ... Procrustes: rmse 2.991261e-06 max resid 5.55058e-06
## ... Similar to previous best
## Run 15 stress 0.168232
## ... Procrustes: rmse 1.667731e-05 max resid 3.299933e-05
## ... Similar to previous best
## Run 16 stress 0.1738561
## Run 17 stress 0.168232
## ... New best solution
## ... Procrustes: rmse 2.528828e-06 max resid 5.232736e-06
## ... Similar to previous best
## Run 18 stress 0.1918952
## Run 19 stress 0.168232
## ... Procrustes: rmse 7.354389e-06 max resid 1.593438e-05
## ... Similar to previous best
## Run 20 stress 0.168232
## ... New best solution
## ... Procrustes: rmse 3.686208e-06 max resid 7.045463e-06
## ... Similar to previous best
## *** Solution reached
eel.nmds$stress # 16% this is good
## [1] 0.168232
stressplot(eel.nmds)
eel.nmds.pts <- as.data.frame(eel.nmds$points)
eel.nmds.pts %>%
rownames_to_column(var = "bay_id_gear") %>%
separate(bay_id_gear, into = c("bay", "id", "gear"), extra = "merge") %>%
unite(bay_id, bay:id) %>%
ggplot(aes(x = MDS1, y = MDS2, color = gear)) +
geom_point() +
stat_ellipse(aes(color = gear))
plot(eel.nmds.pts)
ordispider(eel.nmds, gear.eel$gear, display = "sites")
What is driving the variability/difference in species composition between gear types for the eelgrass sites? We can use simper to look at this
From help documentation - “method gives the contribution of each species to overall dissimilarities, but these are caused by variation in species abundances, and only partly by diff. among groups.”
sim.eel <- simper(full.eel, gear.eel$gear, permutations = 999)
summary(sim.eel)
##
## Contrast: beach_seine_eDNA
##
## average sd ratio ava avb cumsum p
## Clupeidae 0.047102 0.009441 4.9892 0.0000 1.0000 0.1187 0.001
## Ammodytidae 0.027376 0.023868 1.1470 0.2222 0.6667 0.1877 0.106
## Citharichthys stigmaeus 0.025389 0.024330 1.0435 0.1111 0.5556 0.2517 0.023
## Pleuronectidae 0.024885 0.024764 1.0049 0.4444 0.6667 0.3145 0.240
## Teleostei 0.023504 0.022110 1.0631 0.1111 0.5556 0.3737 0.077
## Gadidae 0.023494 0.023414 1.0034 0.5556 0.3333 0.4329 0.596
## Syngnathus leptorhynchus 0.022533 0.026284 0.8573 1.0000 0.5556 0.4897 0.003
## Sebastidae 0.021895 0.023416 0.9351 0.3333 0.4444 0.5449 0.794
## Gasterosteus 0.021511 0.025013 0.8600 0.6667 0.6667 0.5991 0.961
## Aulorhynchus flavidus 0.020862 0.022863 0.9125 0.4444 0.2222 0.6517 0.573
## Stichaeidae 0.018739 0.025263 0.7418 0.6667 0.8889 0.6989 0.515
## Salmonidae 0.018347 0.024970 0.7347 0.6667 0.8889 0.7452 0.375
## Hexagrammidae 0.017608 0.024965 0.7053 0.7778 0.7778 0.7896 0.959
## Lepidogobius lepidus 0.016287 0.021738 0.7492 0.1111 0.3333 0.8306 0.169
## Blepsias cirrhosus 0.014677 0.021345 0.6876 0.3333 0.0000 0.8676 0.209
## Citharichthys sordidus 0.014205 0.020582 0.6902 0.3333 0.0000 0.9034 0.207
## Polypera greeni 0.014119 0.020346 0.6939 0.0000 0.3333 0.9390 0.060
## Agonidae 0.009706 0.018595 0.5220 0.2222 0.0000 0.9635 0.431
## Pholidae 0.005664 0.016302 0.3474 0.8889 1.0000 0.9777 0.889
## Clupidae 0.004499 0.012895 0.3489 0.1111 0.0000 0.9891 0.947
## Liparis fucensis 0.004329 0.012411 0.3488 0.0000 0.1111 1.0000 0.138
## Cottidae 0.000000 0.000000 NaN 1.0000 1.0000 1.0000 1.000
## Embiotocidae 0.000000 0.000000 NaN 1.0000 1.0000 1.0000 1.000
## Rhinogobiops nicholsii 0.000000 0.000000 NaN 0.0000 0.0000 1.0000 1.000
## Ronquilus jordani 0.000000 0.000000 NaN 0.0000 0.0000 1.0000 1.000
## Nautichthys oculofasciatus 0.000000 0.000000 NaN 0.0000 0.0000 1.0000 1.000
## Anoplopoma fimbria 0.000000 0.000000 NaN 0.0000 0.0000 1.0000 1.000
## Brosmophycis marginata 0.000000 0.000000 NaN 0.0000 0.0000 1.0000 1.000
## Gobiesox maeandricus 0.000000 0.000000 NaN 0.0000 0.0000 1.0000 1.000
## Rhamphocottus richardsonii 0.000000 0.000000 NaN 0.0000 0.0000 1.0000 1.000
##
## Clupeidae ***
## Ammodytidae
## Citharichthys stigmaeus *
## Pleuronectidae
## Teleostei .
## Gadidae
## Syngnathus leptorhynchus **
## Sebastidae
## Gasterosteus
## Aulorhynchus flavidus
## Stichaeidae
## Salmonidae
## Hexagrammidae
## Lepidogobius lepidus
## Blepsias cirrhosus
## Citharichthys sordidus
## Polypera greeni .
## Agonidae
## Pholidae
## Clupidae
## Liparis fucensis
## Cottidae
## Embiotocidae
## Rhinogobiops nicholsii
## Ronquilus jordani
## Nautichthys oculofasciatus
## Anoplopoma fimbria
## Brosmophycis marginata
## Gobiesox maeandricus
## Rhamphocottus richardsonii
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 999
We can also look at species that are significantly correlated with the nmds axes. Because it looks like the variability between gear type falls along the first axis can look at what species drive that seperation of gear type
# create df of scores across some variable (habitat or habitat richness, year, julian etc)
scrs <- as.data.frame(scores(eel.nmds, display = 'sites'))
scrs <- cbind(as.data.frame(scrs), gear = gear.eel$gear)
cent <- aggregate(cbind(NMDS1, NMDS2) ~ gear, data =scrs, FUN = mean)
segs <- merge(scrs, setNames(cent, c('gear', 'oNMDS1', 'oNMDS2')),
by = "gear", sort = FALSE)
# create species vectors correlated w/ nmds axis - untransformed data
vec.sp <- envfit(eel.nmds$points, full.eel[,1:30], perm = 999)
vec.sp.arrows <- scores(vec.sp, display = "vectors")
vec.sp.dat <- data.frame(vec.sp.arrows)
vec.sp.dat$pvals <- vec.sp$vectors$pvals
vec.sp.dat$sp_code <- row.names(vec.sp.dat)
vec.sp.dat$r2 <- vec.sp$vectors$r
# filter by species that are sig correlated w/ nmds axis
vec.sp.dat.sig <- vec.sp.dat %>%
filter(pvals < 0.05)
vec.sp.dat.sig <- vec.sp.dat.sig %>%
dplyr::select(MDS1, MDS2, pvals, sp_code, r2)
# scale by nmds?
scalefactor <- min(max(scrs$NMDS1) - min(scrs$NMDS1), max(scrs$NMDS2) - min(scrs$NMDS2))
vec.sp.dat.sig$MDS1_sc <- vec.sp.dat.sig$MDS1 * (scalefactor * vec.sp.dat.sig$r2)
vec.sp.dat.sig$MDS2_sc <- vec.sp.dat.sig$MDS2 * (scalefactor * vec.sp.dat.sig$r2)
Visualize the nmds w/ beach seine eDNA data with species vectors
Figure 14: Nonmetric multidimensional scaling of species communities with eelgrass meadows. Color represents the gear type used to sample the community - either beach seine or environmental DNA. Species vectors are species that are significnatly correlated with the NMDS axes and length represents the strength of that correlation (r2)
Based on both the simper and correlation analyses (using envfit) it looks like herring (clupeidae) and bay pipefish (Syngnathus leptorhynchus) are the species that are strongly correlated with the first axis of the nmds. With hering being more strongly correlated with the eDNA samples and bay pipefish more strongly correlated with beach seine methods. Citharicthys sitgmaeus (speckled sanddab) were more associated with eDNA than beach seine.
Why do I think this? Well for instance, groups of herring might be present nearby the site that was sampled but because the beach seines can only sample a limited area perhaps the group of herring were just missed. Similar with the flatfishes (speckled sanddab) especially since they are bottom associated; however, they are found in some seines so that doesn’t explain it fully.
What is particular interesting is that bay pipefish separate the eDNA and beach seine - and are found in seines when they are not found in eDNA. Could this be related to the fact that they are more closely related to sea horses and have closer to plates rather than scales - less slime?
# kelpc.jacc - jaccard distance kelp only sites
kelpc.pco <- cmdscale(kelpc.jacc, k =2, eig = T, add = T)
colnames(kelpc.pco$points) <- c("pcoa1", "pcoa2")
percent_explained5 <- 100* kelpc.pco$eig/ sum(kelpc.pco$eig)
pe5 <- format(round(percent_explained5[1:2], digits = 1), nsmall = 1, trim = T)
pco.perc5 <- c(glue("PCoA 1 ({pe5[1]}%)"),
glue("PcoA 2 ({pe5[2]}%)"))
kelpc.pco$points %>%
as_tibble(rownames = "bay_id_gear") %>%
separate(bay_id_gear, into = c("bay", "id", "gear", "extra"), extra = "merge") %>%
dplyr::select(-extra) %>%
unite(bay_id, bay:id) %>%
left_join(seine.info) %>%
ggplot(aes(x = pcoa1, y = pcoa2)) +
geom_point(aes(color = gear)) +
labs(x = pco.perc5[1], y = pco.perc5[2]) +
stat_ellipse(aes(color = gear))
## Warning: Expected 4 pieces. Missing pieces filled with `NA` in 7 rows [8, 9, 10,
## 11, 12, 13, 14].
## Joining, by = "bay_id"
definitely looks like its seperating between gear type along the first axis
Using nmds approaches, we can also test for homogeneity of dispersion and differences between species comp of group.
Randomization test to compare within-group ‘dispersions’ with any distance measure. This tells you if the differences between sites are because of dispersion or variation or if it truly driven by a difference in sites (as can be determined by a PERMANOVA test)
H0 = within-group dispersion have no significant differences.
# extract gear info from distance matrix
gear.kelp <- full.kelp %>%
rownames_to_column(var = "bay_id_gear") %>%
separate(bay_id_gear, into = c("bay", "id", "gear"), extra = "merge") %>%
dplyr::select(c(bay, id, gear)) %>%
unite(bay_id, bay:id)
disp.gear2 <- betadisper(kelpc.jacc, gear.kelp$gear)
anova(disp.gear2)
## Analysis of Variance Table
##
## Response: Distances
## Df Sum Sq Mean Sq F value Pr(>F)
## Groups 1 0.011081 0.0110812 5.331 0.03955 *
## Residuals 12 0.024943 0.0020786
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
H0 rejected at pvalue 0.05, there is significant variation within gear type groups - so any conclusions from permanova are conflated by the variation within gear type group.
PERMANOVA: Then test for significant difference in species composition between gear
If the betadisper/analysis of dispersion null hypothesis is REJECTED then any difference in the PERMANOVA analysis between groups - could be driven by this difference in dispersion rather than a true difference in species composition between groups (in this case years).
PERMANOVA assumption: 1) independence 2) homogeneity of dispersion Meaning samples/objects are exchangeable, if H0 is true. Always test diff in dispersion among groups FIRST (done above w/ betadisper)
# analysis of dispersion accepted the null hypothesis
# This is the test of "location", if there are sig. changes in species composition between gear groups
# H0 no group differences
adonis2(kelpc.jacc ~ gear, data = gear.kelp)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = kelpc.jacc ~ gear, data = gear.kelp)
## Df SumOfSqs R2 F Pr(>F)
## gear 1 0.59855 0.23328 3.6511 0.003 **
## Residual 12 1.96726 0.76672
## Total 13 2.56581 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
There is a significant variation by gear type but it is conflated with the within group variation. Can still visualize this using ellipse below:
kelp.nmds <- metaMDS(kelpc.jacc, k = 2, autotransform = FALSE, distance = "jaccard", maxit = 9999)
## Run 0 stress 0.1767779
## Run 1 stress 0.1797712
## Run 2 stress 0.1941147
## Run 3 stress 0.2656931
## Run 4 stress 0.193979
## Run 5 stress 0.2538835
## Run 6 stress 0.1901168
## Run 7 stress 0.1834555
## Run 8 stress 0.1834555
## Run 9 stress 0.1828737
## Run 10 stress 0.1901165
## Run 11 stress 0.1925906
## Run 12 stress 0.200932
## Run 13 stress 0.1901165
## Run 14 stress 0.2068808
## Run 15 stress 0.2029652
## Run 16 stress 0.1877777
## Run 17 stress 0.190912
## Run 18 stress 0.1901169
## Run 19 stress 0.2579642
## Run 20 stress 0.1939262
## *** No convergence -- monoMDS stopping criteria:
## 18: stress ratio > sratmax
## 2: scale factor of the gradient < sfgrmin
kelp.nmds$stress # 17.6% this is good
## [1] 0.1767779
stressplot(kelp.nmds)
kelp.nmds.pts <- as.data.frame(kelp.nmds$points)
kelp.nmds.pts %>%
rownames_to_column(var = "bay_id_gear") %>%
separate(bay_id_gear, into = c("bay", "id", "gear"), extra = "merge") %>%
unite(bay_id, bay:id) %>%
ggplot(aes(x = MDS1, y = MDS2, color = gear)) +
geom_point() +
stat_ellipse(aes(color = gear))
plot(kelp.nmds.pts)
ordispider(kelp.nmds, gear.kelp$gear, display = "sites")
What is driving the variability/difference in species composition between gear types for the understory kelp sites? We can use simper to look at this
From help documentation - “method gives the contribution of each species to overall dissimilarities, but these are caused by variation in species abundances, and only partly by diff. among groups.”
sim.kelp <- simper(full.kelp, gear.kelp$gear, permutations = 999)
summary(sim.kelp)
##
## Contrast: beach_seine_eDNA
##
## average sd ratio ava avb cumsum p
## Clupeidae 0.051484 0.01206 4.2694 0.0000 1.0000 0.1016 0.002
## Stichaeidae 0.038255 0.02670 1.4330 0.2857 1.0000 0.1772 0.015
## Hexagrammidae 0.030809 0.02884 1.0682 0.8571 0.4286 0.2380 0.032
## Ammodytidae 0.027955 0.02804 0.9970 0.4286 0.7143 0.2932 0.261
## Citharichthys stigmaeus 0.025298 0.02376 1.0646 0.1429 0.5714 0.3431 0.256
## Syngnathus leptorhynchus 0.024297 0.02761 0.8799 0.4286 0.2857 0.3911 0.446
## Gadidae 0.024051 0.02711 0.8871 0.4286 0.2857 0.4386 0.830
## Salmonidae 0.023516 0.02882 0.8160 0.5714 1.0000 0.4850 0.135
## Pleuronectidae 0.022988 0.02549 0.9018 0.4286 0.2857 0.5304 0.592
## Sebastidae 0.022074 0.02794 0.7902 0.7143 0.7143 0.5740 0.880
## Gasterosteus 0.020520 0.02658 0.7721 0.2857 0.2857 0.6145 0.611
## Polypera greeni 0.020005 0.02373 0.8431 0.0000 0.4286 0.6540 0.082
## Blepsias cirrhosus 0.019542 0.02340 0.8350 0.4286 0.0000 0.6926 0.188
## Gobiesox maeandricus 0.019075 0.02286 0.8346 0.0000 0.4286 0.7302 0.104
## Teleostei 0.017745 0.02094 0.8473 0.0000 0.4286 0.7653 0.145
## Embiotocidae 0.017713 0.02503 0.7077 0.8571 0.7143 0.8003 0.240
## Aulorhynchus flavidus 0.016404 0.02358 0.6955 0.2857 0.1429 0.8326 0.538
## Nautichthys oculofasciatus 0.015094 0.02132 0.7081 0.1429 0.2857 0.8624 0.719
## Rhinogobiops nicholsii 0.014803 0.02450 0.6042 0.2857 0.0000 0.8917 0.375
## Agonidae 0.012928 0.02109 0.6131 0.2857 0.0000 0.9172 0.431
## Anoplopoma fimbria 0.011520 0.01858 0.6199 0.0000 0.2857 0.9399 0.215
## Cottidae 0.009624 0.02410 0.3993 1.0000 0.8571 0.9589 0.055
## Ronquilus jordani 0.008189 0.02081 0.3935 0.1429 0.0000 0.9751 0.760
## Clupidae 0.007315 0.01850 0.3955 0.1429 0.0000 0.9895 0.810
## Rhamphocottus richardsonii 0.005295 0.01315 0.4026 0.0000 0.1429 1.0000 0.291
## Pholidae 0.000000 0.00000 NaN 1.0000 1.0000 1.0000 1.000
## Citharichthys sordidus 0.000000 0.00000 NaN 0.0000 0.0000 1.0000 1.000
## Lepidogobius lepidus 0.000000 0.00000 NaN 0.0000 0.0000 1.0000 1.000
## Brosmophycis marginata 0.000000 0.00000 NaN 0.0000 0.0000 1.0000 1.000
## Liparis fucensis 0.000000 0.00000 NaN 0.0000 0.0000 1.0000 1.000
##
## Clupeidae **
## Stichaeidae *
## Hexagrammidae *
## Ammodytidae
## Citharichthys stigmaeus
## Syngnathus leptorhynchus
## Gadidae
## Salmonidae
## Pleuronectidae
## Sebastidae
## Gasterosteus
## Polypera greeni .
## Blepsias cirrhosus
## Gobiesox maeandricus
## Teleostei
## Embiotocidae
## Aulorhynchus flavidus
## Nautichthys oculofasciatus
## Rhinogobiops nicholsii
## Agonidae
## Anoplopoma fimbria
## Cottidae .
## Ronquilus jordani
## Clupidae
## Rhamphocottus richardsonii
## Pholidae
## Citharichthys sordidus
## Lepidogobius lepidus
## Brosmophycis marginata
## Liparis fucensis
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 999
We can also look at species that are significantly correlated with the nmds axes. Because it looks like the variability between gear type falls along the first axis can look at what species drive that separation of gear type
# create df of scores across some variable (habitat or habitat richness, year, julian etc)
scrs2 <- as.data.frame(scores(kelp.nmds, display = 'sites'))
scrs2 <- cbind(as.data.frame(scrs2), gear = gear.kelp$gear)
cent2 <- aggregate(cbind(NMDS1, NMDS2) ~ gear, data =scrs2, FUN = mean)
segs2 <- merge(scrs2, setNames(cent2, c('gear', 'oNMDS1', 'oNMDS2')),
by = "gear", sort = FALSE)
# create species vectors correlated w/ nmds axis - untransformed data
vec.sp2 <- envfit(kelp.nmds$points, full.kelp[,1:30], perm = 999)
vec.sp.arrows2 <- scores(vec.sp2, display = "vectors")
vec.sp.dat2 <- data.frame(vec.sp.arrows2)
vec.sp.dat2$pvals <- vec.sp2$vectors$pvals
vec.sp.dat2$sp_code <- row.names(vec.sp.dat2)
vec.sp.dat2$r2 <- vec.sp2$vectors$r
# filter by species that are sig correlated w/ nmds axis
vec.sp.dat.sig2 <- vec.sp.dat2 %>%
filter(pvals < 0.05)
vec.sp.dat.sig2 <- vec.sp.dat.sig2 %>%
dplyr::select(MDS1, MDS2, pvals, sp_code, r2)
# scale by nmds?
scalefactor2 <- min(max(scrs2$NMDS1) - min(scrs2$NMDS1), max(scrs2$NMDS2) - min(scrs2$NMDS2))
vec.sp.dat.sig2$MDS1_sc <- vec.sp.dat.sig2$MDS1 * (scalefactor2 * vec.sp.dat.sig2$r2)
vec.sp.dat.sig2$MDS2_sc <- vec.sp.dat.sig2$MDS2 * (scalefactor2 * vec.sp.dat.sig2$r2)
Visualize the nmds w/ beach seine eDNA data with species vectors for kelp sites
Figure 15: Nonmetric multidimensional scaling of species communities with understory kelp beds. Color represents the gear type used to sample the community - either beach seine or environmental DNA. Species vectors are species that are significnatly correlated with the NMDS axes and length represents the strength of that correlation (r2)
Looks like primarily herring and stichaeidae family sperating the eDNA and beach seining. With herring and stichaeidae (snake prickleback and slender cockscomb) being present in eDNA and not in seine. I dont have any great explainations for why stichaeidae drives the separation in community composition between the two… but it does.